Dillo v3.1.1-117-gc0464026
Loading...
Searching...
No Matches
html.cc
Go to the documentation of this file.
1/*
2 * File: html.cc
3 *
4 * Copyright (C) 2005-2007 Jorge Arellano Cid <jcid@dillo.org>
5 * Copyright (C) 2024 Rodrigo Arias Mallo <rodarima@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 */
12
17/*-----------------------------------------------------------------------------
18 * Includes
19 *---------------------------------------------------------------------------*/
20#include <ctype.h> /* for isspace */
21#include <string.h> /* for memcpy and memmove */
22#include <stdlib.h>
23#include <stdio.h> /* for sprintf */
24#include <errno.h>
25
26#include "bw.h" /* for BrowserWindow */
27#include "msg.h"
28#include "binaryconst.h"
29#include "colors.h"
30#include "html_charrefs.h"
31#include "utf8.hh"
32
33#include "misc.h"
34#include "uicmd.hh"
35#include "history.h"
36#include "menu.hh"
37#include "prefs.h"
38#include "capi.h"
39#include "html.hh"
40#include "html_common.hh"
41#include "form.hh"
42#include "table.hh"
43
44#include "dw/textblock.hh"
45#include "dw/bullet.hh"
46#include "dw/listitem.hh"
47#include "dw/image.hh"
48#include "dw/ruler.hh"
49
50/*-----------------------------------------------------------------------------
51 * Defines
52 *---------------------------------------------------------------------------*/
53
54/* Define to 1 to ignore white space immediately after an open tag,
55 * and immediately before a close tag. */
56#define SGML_SPCDEL 0
57
58#define TAB_SIZE 8
59
60/*-----------------------------------------------------------------------------
61 * Name spaces
62 *---------------------------------------------------------------------------*/
63using namespace lout;
64using namespace dw;
65using namespace dw::core;
66using namespace dw::core::ui;
67using namespace dw::core::style;
68
69/*-----------------------------------------------------------------------------
70 * Typedefs
71 *---------------------------------------------------------------------------*/
72class DilloHtml;
73typedef void (*TagOpenFunct) (DilloHtml *html, const char *tag, int tagsize);
74typedef void (*TagCloseFunct) (DilloHtml *html);
75
85
91
92
93/*
94 * Exported function with C linkage.
95 */
96extern "C" {
97void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data);
98}
99
100/*-----------------------------------------------------------------------------
101 * Forward declarations
102 *---------------------------------------------------------------------------*/
103static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof);
104static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
105 const DilloUrl *requester, DilloImage *image);
106static void Html_callback(int Op, CacheClient_t *Client);
107static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx);
108int a_Html_tag_index(const char *tag);
109
110/*-----------------------------------------------------------------------------
111 * Local Data
112 *---------------------------------------------------------------------------*/
114typedef struct {
115 const char *name; /* element name */
116 unsigned char Flags; /* flags (explained near the table data) */
117 char EndTag; /* Is it Required, Optional or Forbidden */
118 TagOpenFunct open; /* Open function */
119 TagOpenFunct content; /* Content function */
120 TagCloseFunct close; /* Close function */
121} TagInfo;
122
123/* Some element indexes required in scattered places */
124static int
142
143
144/*-----------------------------------------------------------------------------
145 *-----------------------------------------------------------------------------
146 * Main Code
147 *-----------------------------------------------------------------------------
148 *---------------------------------------------------------------------------*/
149
153void DilloHtml::bugMessage(const char *format, ... )
154{
155 va_list argp;
156
157 if (bw->num_page_bugs)
158 dStr_append_c(bw->page_bugs, '\n');
160 "HTML warning: line %d, ",
162 va_start(argp, format);
163 dStr_vsprintfa(bw->page_bugs, format, argp);
164 va_end(argp);
166}
167
173 const char *url_str, const char *base_url,
174 int use_base_url)
175{
176 if (!url_str) {
177 MSG("a_Html_url_new: URL is NULL\n");
178 return NULL;
179 }
180
181 DilloUrl *url = a_Url_new(url_str,
182 (use_base_url) ? base_url : URL_STR_(html->base_url));
183
184 if (!url) {
185 BUG_MSG("URL is not valid '%s'.", url_str);
186 return NULL;
187 }
188
189 int n_ic, n_ic_spc;
190 if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) {
191 const char *suffix = (n_ic) > 1 ? "s" : "";
192 n_ic_spc = URL_ILLEGAL_CHARS_SPC(url);
193 if (n_ic == n_ic_spc) {
194 BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str);
195 } else if (n_ic_spc == 0) {
196 BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').",
197 n_ic, suffix, url_str);
198 } else {
199 BUG_MSG("URL has %d illegal byte%s: "
200 "%d space%s and %d in {00-1F, 7F-FF} range ('%s').",
201 n_ic, suffix,
202 n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str);
203 }
204 }
205 return url;
206}
207
211void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data)
212{
213 DilloWeb *web = (DilloWeb*)P;
214 DilloHtml *html = new DilloHtml(web->bw, web->url, Type);
215
216 *Data = (void*)html;
218
219 return (void*)html->dw;
220}
221
222static void Html_free(void *data)
223{
224 delete ((DilloHtml*)data);
225}
226
230void a_Html_load_images(void *v_html, DilloUrl *pattern)
231{
232 DilloHtml *html = (DilloHtml*)v_html;
233
234 html->loadImages(pattern);
235}
236
240static bool Html_contains_form(DilloHtml *html, void *v_form)
241{
242 for (int i = 0; i < html->forms->size(); i++) {
243 if (html->forms->get(i) == v_form) {
244 return true;
245 }
246 }
247 return false;
248}
249
253void a_Html_form_submit(void *v_html, void *v_form)
254{
255 DilloHtml *html = (DilloHtml*)v_html;
256
257 if (Html_contains_form(html, v_form)) {
258 /* it's still valid */
259 a_Html_form_submit2(v_form);
260 }
261}
262
266void a_Html_form_reset(void *v_html, void *v_form)
267{
268 DilloHtml *html = (DilloHtml*)v_html;
269
270 if (Html_contains_form(html, v_form)) {
271 /* it's still valid */
272 a_Html_form_reset2(v_form);
273 }
274}
275
279void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
280{
281 DilloHtml *html = (DilloHtml*)v_html;
282
283 if (Html_contains_form(html, v_form)) {
284 /* it's still valid */
285 a_Html_form_display_hiddens2(v_form, (display != 0));
286 }
287}
288
292static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
293{
294 char data[64];
295
296 if (x != -1) {
297 snprintf(data, 64, "?%d,%d", x, y);
298 a_Url_set_ismap_coords(html->links->get(link), data);
299 }
300}
301
306static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
307{
308 int nl = html->links->size();
309 html->links->increase();
310 html->links->set(nl, (*url) ? *url : NULL);
311 return nl;
312}
313
318void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
319{
320 const char *align;
321
322 if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) {
323 TextAlignType textAlignType = TEXT_ALIGN_LEFT;
324
325 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
326 BUG_MSG("The align attribute is obsolete in HTML5.");
327
328 if (dStrAsciiCasecmp (align, "left") == 0)
329 textAlignType = TEXT_ALIGN_LEFT;
330 else if (dStrAsciiCasecmp (align, "right") == 0)
331 textAlignType = TEXT_ALIGN_RIGHT;
332 else if (dStrAsciiCasecmp (align, "center") == 0)
333 textAlignType = TEXT_ALIGN_CENTER;
334 else if (dStrAsciiCasecmp (align, "justify") == 0)
335 textAlignType = TEXT_ALIGN_JUSTIFY;
336#if 0
337 else if (dStrAsciiCasecmp (align, "char") == 0) {
338 /* TODO: Actually not supported for <p> etc. */
339 v.textAlign = TEXT_ALIGN_STRING;
340 if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) {
341 if (charattr[0] == 0)
342 /* TODO: ALIGN=" ", and even ALIGN="&32;" will reult in
343 * an empty string (don't know whether the latter is
344 * correct, has to be clarified with the specs), so
345 * that for empty strings, " " is assumed. */
346 style_attrs.textAlignChar = ' ';
347 else
348 style_attrs.textAlignChar = charattr[0];
349 } else
350 /* TODO: Examine LANG attr of <html>. */
351 style_attrs.textAlignChar = '.';
352 }
353#endif
355 textAlignType);
356 }
357}
358
363bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
364{
365 const char *attr;
366 VAlignType valign;
367
368 if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
369 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
370 BUG_MSG("The valign attribute is obsolete in HTML5.");
371
372 if (dStrAsciiCasecmp (attr, "top") == 0)
373 valign = VALIGN_TOP;
374 else if (dStrAsciiCasecmp (attr, "bottom") == 0)
375 valign = VALIGN_BOTTOM;
376 else if (dStrAsciiCasecmp (attr, "baseline") == 0)
377 valign = VALIGN_BASELINE;
378 else
379 valign = VALIGN_MIDDLE;
380
382 CSS_TYPE_ENUM, valign);
383 return true;
384 } else
385 return false;
386}
387
388
393static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace,
394 bool addBreakOpt)
395{
396 Textblock *textblock = new Textblock (prefs.limit_text_width);
397 Style *style;
398
399 if (addBreaks) {
400 StyleAttrs attrs = *(html->style ());
401 attrs.display = DISPLAY_BLOCK;
402 style = Style::create (&attrs);
403 } else {
404 style = html->style ();
405 style->ref ();
406 }
407
408 if (addBreaks)
409 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
410
411 HT2TB(html)->addWidget (textblock, style); /* Works also for floats etc. */
412 if (addBreakOpt)
413 HT2TB(html)->addBreakOption (html->style (), false);
414
415 if (addBreaks)
416 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
417 S_TOP(html)->textblock = html->dw = textblock;
418 if (addBreaks)
419 S_TOP(html)->hand_over_break = true;
420
421 style->unref ();
422}
423
425{
426 return HT2TB(html)->mustAddBreaks (html->style ());
427}
428
433 const char *content_type)
434{
435 /* Init main variables */
436 bw = p_bw;
437 page_url = a_Url_dup(url);
438 base_url = a_Url_dup(url);
439 dw = NULL;
440
441 /* Init event receiver */
442 linkReceiver.html = this;
443 HT2LT(this)->connectLink (&linkReceiver);
444
445 a_Bw_add_doc(p_bw, this);
446
447 /* Init for-parsing variables */
448 Start_Buf = NULL;
449 Start_Ofs = 0;
450
451 _MSG("DilloHtml(): content type: %s\n", content_type);
452 this->content_type = dStrdup(content_type);
453
454 /* get charset */
456
457 stop_parser = false;
458
459 CurrOfs = OldOfs = 0;
460 OldLine = 1;
461
462 DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
463 DocTypeVersion = 0.0f;
464
466
467 cssUrls = new misc::SimpleVector <DilloUrl*> (1);
468
469 stack = new misc::SimpleVector <DilloHtmlState> (16);
470 stack->increase();
474 stack->getRef(0)->cell_text_align_set = false;
475 stack->getRef(0)->display_none = false;
477 stack->getRef(0)->list_number = 0;
478 stack->getRef(0)->tag_idx = -1; /* MUST not be used */
479 stack->getRef(0)->textblock = NULL;
480 stack->getRef(0)->table = NULL;
481 stack->getRef(0)->ref_list_item = NULL;
482 stack->getRef(0)->hand_over_break = false;
483
485
486 Stash = dStr_new("");
487 StashSpace = false;
488
489 pre_column = 0;
490 PreFirstChar = false;
491 PrevWasCR = false;
492 InVisitedLink = false;
493 ReqTagClose = false;
494 TagSoup = true;
495 loadCssFromStash = false;
496 PrevWasBodyClose = false;
497 PrevWasHtmlClose = false;
498
500
502
505 visited_color = -1;
506
507 /* Init page-handling variables */
508 forms = new misc::SimpleVector <DilloHtmlForm*> (1);
509 inputs_outside_form = new misc::SimpleVector <DilloHtmlInput*> (1);
510 links = new misc::SimpleVector <DilloUrl*> (64);
511 images = new misc::SimpleVector <DilloHtmlImage*> (16);
512
513 /* Initialize the main widget */
514 initDw();
515 /* Hook destructor to the dw delete call */
516 dw->setDeleteCallback(Html_free, this);
517}
518
523{
524 dReturn_if_fail (dw == NULL);
525
526 /* Create the main widget */
528
529 bw->num_page_bugs = 0;
531}
532
537{
538 _MSG("::~DilloHtml(this=%p)\n", this);
539
541
542 a_Bw_remove_doc(bw, this);
543
546
547 for (int i = 0; i < cssUrls->size(); i++)
549 delete (cssUrls);
550
551 for (int i = 0; i < forms->size(); i++)
553 delete(forms);
554
555 for (int i = 0; i < inputs_outside_form->size(); i++)
557 delete(inputs_outside_form);
558
559 for (int i = 0; i < links->size(); i++)
560 a_Url_free(links->get(i));
561 delete (links);
562
563 for (int i = 0; i < images->size(); i++) {
564 DilloHtmlImage *img = images->get(i);
565 a_Url_free(img->url);
566 a_Image_unref(img->image);
567 dFree(img);
568 }
569 delete (images);
570
571 delete styleEngine;
572}
573
578void DilloHtml::write(char *Buf, int BufSize, int Eof)
579{
580 int token_start;
581 char *buf = Buf + Start_Ofs;
582 int bufsize = BufSize - Start_Ofs;
583
584 _MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs);
585#if 0
586 char *aux = dStrndup(Buf, BufSize);
587 MSG(" {%s}\n", aux);
588 dFree(aux);
589#endif
590
591 /* Update Start_Buf. It may be used after the parser is stopped */
592 Start_Buf = Buf;
593
594 dReturn_if (dw == NULL);
595 dReturn_if (stop_parser == true);
596
597 token_start = Html_write_raw(this, buf, bufsize, Eof);
598 Start_Ofs += token_start;
599}
600
606{
607 int i, ofs, line;
608 const char *p = Start_Buf;
609
610 dReturn_val_if_fail(p != NULL, -1);
611 /* Disable line counting for META hack. Buffers differ. */
613
614 ofs = CurrOfs;
615 line = OldLine;
616 for (i = OldOfs; i < ofs; ++i)
617 if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n'))
618 ++line;
619 OldOfs = CurrOfs;
620 OldLine = line;
621 return line;
622}
623
628{
629 delete(stack);
630
634 dFree(charset);
635}
636
641void DilloHtml::finishParsing(int ClientKey)
642{
643 int si;
644
645 dReturn_if (stop_parser == true);
646
647 /* flag we've already parsed up to the last byte */
648 InFlags |= IN_EOF;
649
650 /* force the close of elements left open (TODO: not for XHTML) */
651 while ((si = stack->size() - 1)) {
652 if (stack->getRef(si)->tag_idx != -1) {
654 }
655 }
656
657 /* Nothing left to do with the parser. Clear all flags, except EOF. */
658 InFlags = IN_EOF;
659
660 /* Remove this client from our active list */
661 a_Bw_close_client(bw, ClientKey);
662}
663
668 DilloHtmlEnc enc, const char *charset)
669{
670 // avoid data loss on repush after CSS stylesheets have been loaded
671 bool enabled = bw->NumPendingStyleSheets == 0;
672 DilloHtmlForm *form = a_Html_form_new (this, method, action,
673 enc, charset, enabled);
674 int nf = forms->size ();
675 forms->increase ();
676 forms->set (nf, form);
677 _MSG("Html formNew: action=%s nform=%d\n", action, nf);
678 return forms->size();
679}
680
685{
686 return forms->get (forms->size() - 1);
687}
688
690{
691 for (int i = 0; i < images->size(); i++) {
692 if (images->get(i)->image != NULL) {
693 return TRUE;
694 }
695 }
696 return FALSE;
697}
698
702void DilloHtml::loadImages (const DilloUrl *pattern)
703{
705
706 /* If the user asked for a specific image, the user (NULL) is the requester,
707 * and the domain mechanism will always permit the request. But if the user
708 * just asked for all images (clicking "Load images"), use the page URL as
709 * the requester so that the domain mechanism can act as a filter.
710 * If the possible patterns become more complex, it might be good to have
711 * the caller supply the requester instead.
712 */
713 const DilloUrl *requester = pattern ? NULL : this->page_url;
714
715 for (int i = 0; i < images->size(); i++) {
716 DilloHtmlImage *hi = images->get(i);
717
718 if (hi->image) {
719 assert(hi->url);
720 if ((!pattern) || (!a_Url_cmp(hi->url, pattern))) {
721 if (Html_load_image(bw, hi->url, requester, hi->image)) {
722 a_Image_unref (hi->image);
723 hi->image = NULL; // web owns it now
724 }
725 }
726 }
727 }
728}
729
734{
735 int nu = cssUrls->size();
736 cssUrls->increase();
737 cssUrls->set(nu, a_Url_dup(url));
738}
739
740bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img,
741 int x, int y)
742{
744
745 _MSG(" ** ");
746 if (link == -1) {
747 _MSG(" Link LEAVE notify...\n");
748 a_UIcmd_set_msg(bw, "");
749 } else {
750 _MSG(" Link ENTER notify...\n");
751 Html_set_link_coordinates(html, link, x, y);
752 a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link)));
753 }
754 return true;
755}
756
760bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img,
761 int x, int y, EventButton *event)
762{
763 BrowserWindow *bw = html->bw;
764 int ret = false;
765 DilloUrl *linkurl = NULL;
766
767 _MSG("pressed button %d\n", event->button);
768 if (event->button == 3) {
769 // popup menus
770 if (img != -1) {
771 // image menu
772 if (link != -1)
773 linkurl = html->links->get(link);
774 const bool_t loaded_img = (html->images->get(img)->image == NULL);
775 a_UIcmd_image_popup(bw, html->images->get(img)->url, loaded_img,
776 html->page_url, linkurl);
777 ret = true;
778 } else {
779 if (link == -1) {
780 a_UIcmd_page_popup(bw, bw->num_page_bugs != 0, html->cssUrls);
781 ret = true;
782 } else {
783 a_UIcmd_link_popup(bw, html->links->get(link));
784 ret = true;
785 }
786 }
787 }
788 return ret;
789}
790
794bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img,
795 int x, int y, EventButton *event)
796{
797 BrowserWindow *bw = html->bw;
798
799 if ((img != -1) && (html->images->get(img)->image)) {
800 // clicked an image that has not already been loaded
801 if (event->button == 1){
802 // load all instances of this image
803 DilloUrl *pattern = html->images->get(img)->url;
804 html->loadImages(pattern);
805 return true;
806 }
807 }
808
809 if (link != -1) {
810 DilloUrl *url = html->links->get(link);
811 _MSG("clicked on URL %d: %s\n", link, a_Url_str (url));
812
813 Html_set_link_coordinates(html, link, x, y);
814
815 if (event->button == 1) {
816 a_UIcmd_open_url(bw, url);
817 } else if (event->button == 2) {
819 int focus = prefs.focus_new_tab ? 1 : 0;
820 if (event->state == SHIFT_MASK) focus = !focus;
821 a_UIcmd_open_url_nt(bw, url, focus);
822 } else
824 } else {
825 return false;
826 }
827
828 /* Change the link color to "visited" as visual feedback */
829 for (Widget *w = widget; w; w = w->getParent()) {
830 _MSG(" ->%s\n", w->getClassName());
831 if (w->instanceOf(dw::Textblock::CLASS_ID)) {
832 ((Textblock*)w)->changeLinkColor (link, html->visited_color);
833 break;
834 }
835 }
836 }
837 return true;
838}
839
844{
845 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH;
846 html->StashSpace = false;
847 dStr_truncate(html->Stash, 0);
848}
849
856static int Html_ms_stupid_quotes_2ucs(int codepoint)
857{
858 int ret;
859 switch (codepoint) {
860 case 145:
861 case 146: ret = '\''; break;
862 case 147:
863 case 148: ret = '"'; break;
864 case 149: ret = 176; break;
865 case 150:
866 case 151: ret = '-'; break;
867 default: ret = codepoint; break;
868 }
869 return ret;
870}
871
876static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok,
877 bool_t is_attr, int *entsize)
878{
879 static char buf[5];
880 char *s = tok;
881 int n, codepoint = -1;
882
883 errno = 0;
884
885 if (*s == 'x' || *s == 'X') {
886 if (isxdigit(*++s)) {
887 /* strtol with base 16 accepts leading "0x" - we don't */
888 if (*s == '0' && s[1] == 'x') {
889 s++;
890 codepoint = 0;
891 } else {
892 codepoint = strtol(s, &s, 16);
893 }
894 }
895 } else if (isdigit(*s)) {
896 codepoint = strtol(s, &s, 10);
897 }
898 if (errno)
899 codepoint = -1;
900
901 if (*s == ';')
902 s++;
903 else {
904 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
905 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) {
906 char c = *s;
907 *s = '\0';
908 BUG_MSG("Character reference '&#%s' lacks ';'.", tok);
909 *s = c;
910 }
911 /* Don't require ';' for old HTML, except that our current heuristic
912 * is to require it in attributes to avoid cases like "&copy=1" found
913 * in URLs.
914 */
915 if (is_attr || html->DocType == DT_XHTML ||
916 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
917 return NULL;
918 }
919
920 }
921 if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' &&
922 codepoint != '\f') ||
923 (codepoint >= 0x7f && codepoint <= 0x9f) ||
924 (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff ||
925 ((codepoint & 0xfffe) == 0xfffe) ||
926 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
927 codepoint > 0xffff)) {
928 /* this catches null bytes, errors, codes out of range, disallowed
929 * control chars, permanently undefined chars, and surrogates.
930 */
931 char c = *s;
932 *s = '\0';
933 BUG_MSG("Numeric character reference '&#%s' is not valid.", tok);
934 *s = c;
935
936 codepoint = (codepoint >= 145 && codepoint <= 151) ?
937 Html_ms_stupid_quotes_2ucs(codepoint) : -1;
938 }
939 if (codepoint != -1) {
940 if (codepoint >= 128) {
941 n = a_Utf8_encode(codepoint, buf);
942 } else {
943 n = 1;
944 buf[0] = (char) codepoint;
945 }
946 assert(n < 5);
947 buf[n] = '\0';
948 *entsize = s-tok+2;
949 return buf;
950 } else {
951 return NULL;
952 }
953}
954
958static int Html_charref_comp(const void *a, const void *b)
959{
960 return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref);
961}
962
967{
968 Charref_t RefKey;
969
970 RefKey.ref = key;
971 return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef,
973}
974
979static const char *Html_parse_named_charref(DilloHtml *html, char *tok,
980 bool_t is_attr, int *entsize)
981{
982 Charref_t *p;
983 char c;
984 char *s = tok;
985 const char *ret = NULL;
986
987 while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
988 c = *s;
989 *s = '\0';
990 if (c != ';') {
991 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
992 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f)))
993 BUG_MSG("Character reference '&%s' lacks ';'.", tok);
994
995 /* Don't require ';' for old HTML, except that our current heuristic
996 * is to require it in attributes to avoid cases like "&copy=1" found
997 * in URLs.
998 */
999 if (is_attr || html->DocType == DT_XHTML ||
1000 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
1001 return ret;
1002 }
1003 }
1004
1005 if ((p = Html_charref_search(tok))) {
1006 ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ?
1007 p->html5_str : p->html4_str;
1008 }
1009
1010 if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos"))
1011 ret = "'";
1012
1013 *s = c;
1014 if (c == ';')
1015 s++;
1016
1017 if (!ret) {
1018 c = *s;
1019 *s = '\0';
1020 BUG_MSG("Undefined character reference '&%s'.", tok);
1021 *s = c;
1022 }
1023 *entsize = s-tok+1;
1024 return ret;
1025}
1026
1035static const char *Html_parse_entity(DilloHtml *html, const char *token,
1036 int toksize, int *entsize, bool_t is_attr)
1037{
1038 const char *ret = NULL;
1039 char *tok;
1040
1041 if (toksize > 50) {
1042 /* In pathological cases, attributes can be megabytes long and filled
1043 * with character references. As of HTML5, the longest defined character
1044 * reference is about 32 bytes long.
1045 */
1046 toksize = 50;
1047 }
1048
1049 token++;
1050 tok = dStrndup(token, (uint_t)toksize);
1051
1052 if (*tok == '#') {
1053 ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize);
1054 } else if (isalpha(*tok)) {
1055 ret = Html_parse_named_charref(html, tok, is_attr, entsize);
1056 } else if (prefs.show_extra_warnings &&
1057 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) {
1058 // HTML5 doesn't mind literal '&'s.
1059 BUG_MSG("Literal '&'.");
1060 }
1061 dFree(tok);
1062
1063 return ret;
1064}
1065
1070char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
1071{
1072 const char *esc_set = "&";
1073 int i, s, entsize;
1074 char *str;
1075
1076 s = strcspn(token, esc_set);
1077 if (s >= toksize) {
1078 /* no ampersands */
1079 str = dStrndup(token, toksize);
1080 } else {
1081 Dstr *ds = dStr_sized_new(toksize);
1082
1083 dStr_append_l(ds, token, s);
1084
1085 for (i = s; i < toksize; i++) {
1086 const char *entstr;
1087 const bool_t is_attr = FALSE;
1088
1089 if (token[i] == '&' &&
1090 (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize,
1091 is_attr))) {
1092 dStr_append(ds, entstr);
1093 i += entsize-1;
1094 } else {
1095 dStr_append_c(ds, token[i]);
1096 }
1097 }
1098 str = ds->str;
1099 dStr_free(ds, 0);
1100 }
1101 return str;
1102}
1103
1108static void Html_process_space_pre_line(DilloHtml *html, const char *space,
1109 int spacesize)
1110{
1111 int i, breakCnt = 0;
1112
1113 for (i = 0; i < spacesize; i++) {
1114 /* Support for "\r", "\n" and "\r\n" line breaks */
1115 if (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR)) {
1116 breakCnt++;
1117 html->PrevWasCR = (space[i] == '\r');
1118
1119 HT2TB(html)->addLinebreak (html->wordStyle ());
1120 }
1121 }
1122 if (breakCnt == 0) {
1123 HT2TB(html)->addSpace(html->wordStyle ());
1124 }
1125}
1126
1130static void Html_process_space(DilloHtml *html, const char *space,
1131 int spacesize)
1132{
1133 char *spc;
1134 int i, offset;
1135 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1136
1137 if (S_TOP(html)->display_none) {
1138 /* do nothing */
1139 } else if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) {
1140 html->StashSpace = (html->Stash->len > 0);
1141
1142 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1143 dStr_append_l(html->Stash, space, spacesize);
1144
1145 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1146 int spaceCnt = 0;
1147
1148 /* re-scan the string for characters that cause line breaks */
1149 for (i = 0; i < spacesize; i++) {
1150 /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */
1151 if (!html->PreFirstChar &&
1152 (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {
1153
1154 if (spaceCnt) {
1155 spc = dStrnfill(spaceCnt, ' ');
1156 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1157 dFree(spc);
1158 spaceCnt = 0;
1159 }
1160 HT2TB(html)->addLinebreak (html->wordStyle ());
1161 html->pre_column = 0;
1162 }
1163 html->PreFirstChar = false;
1164
1165 /* cr and lf should not be rendered -- they appear as a break */
1166 switch (space[i]) {
1167 case '\r':
1168 case '\n':
1169 break;
1170 case '\t':
1172 BUG_MSG("TAB character inside <pre>.");
1173 offset = TAB_SIZE - html->pre_column % TAB_SIZE;
1174 spaceCnt += offset;
1175 html->pre_column += offset;
1176 break;
1177 default:
1178 spaceCnt++;
1179 html->pre_column++;
1180 break;
1181 }
1182
1183 html->PrevWasCR = (space[i] == '\r');
1184 }
1185
1186 if (spaceCnt) {
1187 // add break possibility for the white-space:pre-wrap case
1188 HT2TB(html)->addBreakOption (html->wordStyle (), false);
1189 spc = dStrnfill(spaceCnt, ' ');
1190 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1191 dFree(spc);
1192 }
1193
1194 } else {
1195 if (SGML_SPCDEL) {
1196 /* SGML_SPCDEL ignores white space immediately after an open tag */
1197 } else if (html->wordStyle ()->whiteSpace == WHITE_SPACE_PRE_LINE) {
1198 Html_process_space_pre_line(html, space, spacesize);
1199 } else {
1200 HT2TB(html)->addSpace(html->wordStyle ());
1201 }
1202
1203 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
1204 html->StashSpace = (html->Stash->len > 0);
1205 }
1206}
1207
1216static void Html_process_word(DilloHtml *html, const char *word, int size)
1217{
1218 int i, j, start;
1219 char *Pword;
1220 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1221
1222 if (S_TOP(html)->display_none)
1223 return;
1224 if ((i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0)) {
1225 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
1226 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
1227 }
1228
1229 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1231 if (html->StashSpace) {
1232 dStr_append_c(html->Stash, ' ');
1233 html->StashSpace = false;
1234 }
1235 Pword = a_Html_parse_entities(html, word, size);
1236 dStr_append(html->Stash, Pword);
1237 dFree(Pword);
1238
1239 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1240 /* word goes in untouched, it is not processed here. */
1241 dStr_append_l(html->Stash, word, size);
1242 }
1243
1244 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1245 parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1246 /* skip until the closing instructions */
1247
1248 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1249 /* all this overhead is to catch white-space entities */
1250 Pword = a_Html_parse_entities(html, word, size);
1251 for (start = i = 0; Pword[i]; start = i)
1252 if (isspace(Pword[i])) {
1253 while (Pword[++i] && isspace(Pword[i])) ;
1254 Html_process_space(html, Pword + start, i - start);
1255 } else {
1256 while (Pword[++i] && !isspace(Pword[i])) ;
1257 HT2TB(html)->addText(Pword + start, i - start, html->wordStyle ());
1258 html->pre_column += i - start;
1259 html->PreFirstChar = false;
1260 }
1261 dFree(Pword);
1262
1263 } else {
1264 const char *word2, *beyond_word2;
1265
1266 Pword = NULL;
1267 if (!memchr(word,'&', size)) {
1268 /* No entities */
1269 word2 = word;
1270 beyond_word2 = word + size;
1271 } else {
1272 /* Collapse white-space entities inside the word (except &nbsp;) */
1273 Pword = a_Html_parse_entities(html, word, size);
1274 /* Collapse adjacent " \t\f\n\r" characters into a single space */
1275 for (i = j = 0; (Pword[i] = Pword[j]); ++i, ++j) {
1276 if (strchr(" \t\f\n\r", Pword[i])) {
1277 if (i == 0 || (i > 0 && Pword[i-1] != ' '))
1278 Pword[i] = ' ';
1279 else
1280 for (--i; Pword[j+1] && strchr(" \t\f\n\r", Pword[j+1]); ++j)
1281 ;
1282 }
1283 }
1284 word2 = Pword;
1285 beyond_word2 = word2 + strlen(word2);
1286 }
1287 for (start = i = 0; word2[i]; start = i) {
1288 int len;
1289
1290 if (isspace(word2[i])) {
1291 while (word2[++i] && isspace(word2[i])) ;
1292 Html_process_space(html, word2 + start, i - start);
1293 } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
1294 i += 3;
1295 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1296 } else if (a_Utf8_ideographic(word2+i, beyond_word2, &len)) {
1297 i += len;
1298 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1299 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1300 } else {
1301 do {
1302 i += len;
1303 } while (word2[i] && !isspace(word2[i]) &&
1304 strncmp(word2+i, utf8_zero_width_space, 3) &&
1305 (!a_Utf8_ideographic(word2+i, beyond_word2, &len)));
1306 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1307 }
1308 }
1309 if (Pword == word2)
1310 dFree(Pword);
1311 }
1312}
1313
1318static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
1319{
1320 int i;
1321
1322 for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
1323 if (D_ASCII_TOLOWER(tagstr[i]) != D_ASCII_TOLOWER(tag[i]))
1324 return false;
1325 }
1326 /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
1327 if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
1328 return true;
1329 return false;
1330}
1331
1336static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
1337{
1338 if (html->dw != S_TOP(html)->textblock) {
1339 if (hand_over_break)
1340 HT2TB(html)->handOverBreak (html->style ());
1341 HT2TB(html)->flush ();
1342 html->dw = S_TOP(html)->textblock;
1343 }
1344}
1345
1349static void Html_push_tag(DilloHtml *html, int tag_idx)
1350{
1351 int n_items;
1352
1353 n_items = html->stack->size ();
1354 html->stack->increase ();
1355 /* We'll copy the former stack item and just change the tag and its index
1356 * instead of copying all fields except for tag. --Jcid */
1357 *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
1358 html->stack->getRef(n_items)->tag_idx = tag_idx;
1359 html->dw = S_TOP(html)->textblock;
1360}
1361
1366static void Html_force_push_tag(DilloHtml *html, int tag_idx)
1367{
1368 html->startElement (tag_idx);
1369 Html_push_tag(html, tag_idx);
1370}
1371
1376{
1377 bool hand_over_break;
1378
1379 html->styleEngine->endElement (S_TOP(html)->tag_idx);
1380 hand_over_break = S_TOP(html)->hand_over_break;
1381 html->stack->setSize (html->stack->size() - 1);
1382 Html_eventually_pop_dw(html, hand_over_break);
1383}
1384
1385
1386
1387/*
1388 * Some parsing routines.
1389 */
1390
1391/*
1392 * Used by a_Html_parse_length
1393 */
1395 char **endptr)
1396{
1397 CssLength l;
1398 double v;
1399 char *end;
1400
1401 v = strtod (attr, &end);
1402 switch (*end) {
1403 case '%':
1404 end++;
1406 break;
1407
1408 case '*':
1409 end++;
1411 break;
1412/*
1413 The "px" suffix seems not allowed by HTML4.01 SPEC.
1414 case 'p':
1415 if (end[1] == 'x')
1416 end += 2;
1417*/
1418 default:
1420 break;
1421 }
1422
1423 if (endptr)
1424 *endptr = end;
1425 return l;
1426}
1427
1428
1433CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
1434{
1435 CssLength l;
1436 char *end;
1437
1438 l = Html_parse_length_or_multi_length (attr, &end);
1440 /* not allowed as &Length; */
1442 else {
1443 /* allow only whitespaces */
1444 if (*end && !isspace (*end)) {
1445 BUG_MSG("Garbage after length: '%s'.", attr);
1447 }
1448 }
1449
1450 _MSG("a_Html_parse_length: \"%s\" %d\n", attr, CSS_LENGTH_VALUE(l));
1451 return l;
1452}
1453
1458int32_t a_Html_color_parse(DilloHtml *html, const char *str,
1459 int32_t default_color)
1460{
1461 int err = 1;
1462 int32_t color = a_Color_parse(str, default_color, &err);
1463
1464 if (err) {
1465 BUG_MSG("Color \"%s\" is not in \"#RRGGBB\" format.", str);
1466 }
1467 return color;
1468}
1469
1475static int
1476 Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
1477{
1478 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) {
1479 bool valid = *val && !strchr(val, ' ');
1480
1481 if (!valid) {
1482 BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
1483 "spaces.", attrname, val);
1484 }
1485 return valid ? 1 : 0;
1486 } else {
1487 int i;
1488
1489 for (i = 0; val[i]; ++i)
1490 if (!d_isascii(val[i]) || !(isalnum(val[i]) || strchr(":_.-", val[i])))
1491 break;
1492
1493 if (val[i] || !(d_isascii(val[0]) && isalpha(val[0])))
1494 BUG_MSG("%s attribute value \"%s\" is not of the form "
1495 "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);
1496
1497 return !(val[i]);
1498 }
1499}
1500
1520static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
1521{
1522 static const char HTML_SGML_sig [] = "<!DOCTYPE HTML PUBLIC ";
1523 static const char HTML20 [] = "-//IETF//DTD HTML";
1524 static const char HTML32 [] = "-//W3C//DTD HTML 3.2";
1525 static const char HTML40 [] = "-//W3C//DTD HTML 4.0";
1526 static const char HTML401 [] = "-//W3C//DTD HTML 4.01";
1527 static const char HTML401_url[] = "http://www.w3.org/TR/html4/";
1528 static const char XHTML1 [] = "-//W3C//DTD XHTML 1.0";
1529 static const char XHTML1_url [] = "http://www.w3.org/TR/xhtml1/DTD/";
1530 static const char XHTML11 [] = "-//W3C//DTD XHTML 1.1";
1531 static const char XHTML11_url[] = "http://www.w3.org/TR/xhtml11/DTD/";
1532
1533 size_t i;
1534 int quote;
1535 char *p, *ntag = dStrndup(tag, tagsize);
1536
1537 /* Tag sanitization: Collapse whitespace between tokens
1538 * and replace '\n' and '\r' with ' ' inside quoted strings. */
1539 for (i = 0, p = ntag; *p; ++p) {
1540 if (isspace(*p)) {
1541 for (ntag[i++] = ' '; isspace(p[1]); ++p) ;
1542 } else if ((quote = *p) == '"' || *p == '\'') {
1543 for (ntag[i++] = *p++; (ntag[i] = *p) && ntag[i++] != quote; ++p) {
1544 if (*p == '\n' || *p == '\r')
1545 ntag[i - 1] = ' ';
1546 p += (p[0] == '\r' && p[1] == '\n') ? 1 : 0;
1547 }
1548 } else {
1549 ntag[i++] = *p;
1550 }
1551 if (!*p)
1552 break;
1553 }
1554 ntag[i] = 0;
1555
1556 _MSG("New: {%s}\n", ntag);
1557
1558 if (html->DocType != DT_NONE)
1559 BUG_MSG("Multiple DOCTYPE declarations.");
1560
1561 /* The default DT_NONE type is TagSoup */
1562 if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
1563 !dStrnAsciiCasecmp(ntag, HTML_SGML_sig, strlen(HTML_SGML_sig))) {
1564 p = ntag + strlen(HTML_SGML_sig) + 1;
1565 if (!strncmp(p, HTML401, strlen(HTML401)) &&
1566 dStriAsciiStr(p + strlen(HTML401), HTML401_url)) {
1567 html->DocType = DT_HTML;
1568 html->DocTypeVersion = 4.01f;
1569 } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
1570 dStriAsciiStr(p + strlen(XHTML1), XHTML1_url)) {
1571 html->DocType = DT_XHTML;
1572 html->DocTypeVersion = 1.0f;
1573 } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
1574 dStriAsciiStr(p + strlen(XHTML11), XHTML11_url)) {
1575 html->DocType = DT_XHTML;
1576 html->DocTypeVersion = 1.1f;
1577 } else if (!strncmp(p, HTML40, strlen(HTML40))) {
1578 html->DocType = DT_HTML;
1579 html->DocTypeVersion = 4.0f;
1580 } else if (!strncmp(p, HTML32, strlen(HTML32))) {
1581 html->DocType = DT_HTML;
1582 html->DocTypeVersion = 3.2f;
1583 } else if (!strncmp(p, HTML20, strlen(HTML20))) {
1584 html->DocType = DT_HTML;
1585 html->DocTypeVersion = 2.0f;
1586 }
1587 } else if (!dStrAsciiCasecmp(ntag, "<!DOCTYPE html>") ||
1588 !dStrAsciiCasecmp(ntag, "<!DOCTYPE html >") ||
1589 !dStrAsciiCasecmp(ntag,
1590 "<!DOCTYPE html SYSTEM \"about:legacy-compat\">") ||
1591 !dStrAsciiCasecmp(ntag,
1592 "<!DOCTYPE html SYSTEM 'about:legacy-compat'>")) {
1593 html->DocType = DT_HTML;
1594 html->DocTypeVersion = 5.0f;
1595 }
1596 if (html->DocType == DT_NONE) {
1597 html->DocType = DT_UNRECOGNIZED;
1598 BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
1599 }
1600 dFree(ntag);
1601}
1602
1606static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
1607{
1608 /* The IN_HTML flag will be kept set until at IN_EOF condition.
1609 * This allows to handle pages with multiple or uneven HTML tags */
1610
1611 if (!(html->InFlags & IN_HTML))
1612 html->InFlags |= IN_HTML;
1613 if (html->Num_HTML < UCHAR_MAX)
1614 ++html->Num_HTML;
1615
1616 if (html->Num_HTML > 1) {
1617 BUG_MSG("<html> was already open.");
1618 html->ReqTagClose = true;
1619 }
1620}
1621
1626{
1627 _MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML);
1628
1629 /* As some Tag soup pages use multiple HTML tags, this function
1630 * gets called only on EOF and upon and extra HTML open.
1631 * Also, we defer clearing the IN_HTML flag until IN_EOF */
1632}
1633
1637static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
1638{
1639 if (html->InFlags & IN_BODY) {
1640 BUG_MSG("<head> must go before the BODY section.");
1641 html->ReqTagClose = true;
1642 return;
1643 }
1644
1645 if (html->Num_HEAD < UCHAR_MAX)
1646 ++html->Num_HEAD;
1647 if (html->InFlags & IN_HEAD) {
1648 BUG_MSG("<head> was already open.");
1649 html->ReqTagClose = true;
1650 } else if (html->Num_HEAD > 1) {
1651 BUG_MSG("<head> already finished -- ignoring.");
1652 html->ReqTagClose = true;
1653 } else {
1654 html->InFlags |= IN_HEAD;
1655 }
1656}
1657
1663{
1664 if (html->InFlags & IN_HEAD) {
1665 if (html->Num_HEAD == 1) {
1666 /* match for the well formed start of HEAD section */
1667 if (html->Num_TITLE == 0)
1668 BUG_MSG("<head> lacks <title>.");
1669
1670 html->InFlags &= ~IN_HEAD;
1671
1672 /* charset is already set, load remote stylesheets now */
1673 for (int i = 0; i < html->cssUrls->size(); i++) {
1674 a_Html_load_stylesheet(html, html->cssUrls->get(i));
1675 }
1676 } else if (html->Num_HEAD > 1) {
1677 --html->Num_HEAD;
1678 }
1679 } else {
1680 /* not reached, see Html_tag_cleanup_at_close() */
1681 }
1682}
1683
1688static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
1689{
1690 /* fill the stash buffer so TITLE content can be ignored
1691 * when not valid, redundant or outside HEAD section */
1692 a_Html_stash_init(html);
1693
1694 if (html->InFlags & IN_HEAD) {
1695 if (html->Num_TITLE < UCHAR_MAX)
1696 ++html->Num_TITLE;
1697 if (html->Num_TITLE > 1)
1698 BUG_MSG("Redundant <title>.");
1699 } else {
1700 BUG_MSG("<title> must be inside <head> -- ignoring.");
1701 }
1702}
1703
1709{
1710 /* title is only valid inside HEAD */
1711 if (html->InFlags & IN_HEAD && html->Num_TITLE == 1) {
1712 /* Ignore empty titles: <title></title> */
1713 char *title = html->Stash->str;
1714 if (!title || title[0] == '\0')
1715 return;
1716 a_UIcmd_set_page_title(html->bw, title);
1718 }
1719}
1720
1726static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
1727{
1728 a_Html_stash_init(html);
1729 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1730}
1731
1736{
1737 /* eventually the stash will be sent to an interpreter for parsing */
1738}
1739
1744static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
1745{
1746 const char *attrbuf;
1747
1748 html->loadCssFromStash = true;
1749
1750 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
1751 if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
1752 BUG_MSG("<style> requires type attribute.");
1753 } else if (dStrAsciiCasecmp(attrbuf, "text/css")) {
1754 html->loadCssFromStash = false;
1755 }
1756 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
1757 dStrAsciiCasecmp(attrbuf, "all") && !dStriAsciiStr(attrbuf, "screen")) {
1758 /* HTML 4.01 sec. 6.13 says that media descriptors are case-sensitive,
1759 * but sec. 14.2.3 says that the attribute is case-insensitive.
1760 * TODO can be a comma-separated list.
1761 * TODO handheld.
1762 */
1763 html->loadCssFromStash = false;
1764 }
1765
1766 a_Html_stash_init(html);
1767 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1768}
1769
1774{
1776 html->styleEngine->parse(html, html->base_url, html->Stash->str,
1777 html->Stash->len, CSS_ORIGIN_AUTHOR);
1778}
1779
1780/*
1781 * <BODY>
1782 */
1783static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
1784{
1785 const char *attrbuf;
1786 int32_t color;
1787 style::Color *bgColor;
1788 style::StyleImage *bgImage;
1789 style::BackgroundRepeat bgRepeat;
1790 style::BackgroundAttachment bgAttachment;
1791 style::Length bgPositionX, bgPositionY;
1792
1793 _MSG("Html_tag_open_body Num_BODY=%d\n", html->Num_BODY);
1794 if (!(html->InFlags & IN_BODY))
1795 html->InFlags |= IN_BODY;
1796 if (html->Num_BODY < UCHAR_MAX)
1797 ++html->Num_BODY;
1798
1799 if (html->Num_BODY > 1) {
1800 BUG_MSG("<body> was already open.");
1801 html->ReqTagClose = true;
1802 return;
1803 }
1804
1805 if (html->InFlags & IN_HEAD) {
1806 /* if we're here, it's bad XHTML, no need to recover */
1807 BUG_MSG("Unclosed <head>.");
1808 }
1809
1810 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
1811 color = a_Html_color_parse(html, attrbuf, -1);
1812
1813 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1814 BUG_MSG("<body> bgcolor attribute is obsolete.");
1815
1816 if (color != -1)
1818 CSS_TYPE_COLOR, color);
1819 }
1820
1821 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
1822 color = a_Html_color_parse(html, attrbuf, -1);
1823
1824 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1825 BUG_MSG("<body> text attribute is obsolete.");
1826
1827 if (color != -1)
1829 CSS_TYPE_COLOR, color);
1830 }
1831
1832 html->restyle ();
1833
1834 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) {
1835 html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1);
1836 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1837 BUG_MSG("<body> link attribute is obsolete.");
1838 }
1839
1840 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) {
1841 html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1);
1842 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1843 BUG_MSG("<body> vlink attribute is obsolete.");
1844 }
1845
1846 html->dw->setStyle (html->style ());
1847
1848 bgColor = html->styleEngine->backgroundColor ();
1849 if (bgColor)
1850 HT2LT(html)->setBgColor(bgColor);
1851
1852 bgImage = html->styleEngine->backgroundImage (&bgRepeat, &bgAttachment,
1853 &bgPositionX, &bgPositionY);
1854 if (bgImage)
1855 HT2LT(html)->setBgImage(bgImage, bgRepeat, bgAttachment, bgPositionX,
1856 bgPositionY);
1857
1858 /* Determine a color for visited links.
1859 * This color is computed once per page and used for immediate feedback
1860 * when clicking a link.
1861 * On reload style including color for visited links is computed properly
1862 * according to CSS.
1863 */
1864 html->startElement (i_A);
1865 html->styleEngine->setPseudoVisited ();
1866 if (html->non_css_visited_color != -1) {
1868 html->non_css_visited_color);
1869 }
1870 html->visited_color = html->style ()->color->getColor ();
1871 html->styleEngine->endElement (i_A);
1872
1874 /* get a color that has a "safe distance" from text, link and bg */
1875 html->visited_color =
1877 html->style ()->color->getColor(),
1878 html->non_css_link_color,
1880 }
1881
1882
1883 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
1884}
1885
1886/*
1887 * BODY
1888 */
1890{
1891 _MSG("Html_tag_close_body: Num_BODY=%d\n", html->Num_BODY);
1892
1893 /* As some Tag soup pages use multiple BODY tags, this function
1894 * gets called only on EOF and upon and extra BODY open.
1895 * Also, we defer clearing the IN_BODY flag until IN_EOF */
1896}
1897
1898/*
1899 * <P>
1900 * TODO: what's the point between adding the parbreak before and
1901 * after the push?
1902 */
1903static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
1904{
1905 CssPropertyList props;
1906
1907 a_Html_tag_set_align_attr (html, tag, tagsize);
1908}
1909
1910/*
1911 * <FRAME>, <IFRAME>
1912 * TODO: This is just a temporary fix while real frame support
1913 * isn't finished. Imitates lynx/w3m's frames.
1914 */
1915static void Html_tag_open_frame (DilloHtml *html, const char *tag, int tagsize)
1916{
1917 const char *attrbuf;
1918 DilloUrl *url;
1919 CssPropertyList props;
1920
1921 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1922 return;
1923
1924 if (!(url = a_Html_url_new(html, attrbuf, NULL, 0)))
1925 return;
1926
1928 /* visited frame */
1929 html->styleEngine->setPseudoVisited ();
1930 } else {
1931 /* unvisited frame */
1932 html->styleEngine->setPseudoLink ();
1933 }
1934
1936 Html_set_new_link(html,&url));
1937}
1938
1939static void
1940 Html_tag_content_frame (DilloHtml *html, const char *tag, int tagsize)
1941{
1942 const char *attrbuf;
1943 char *src;
1944 Textblock *textblock;
1945 Widget *bullet;
1946
1947 textblock = HT2TB(html);
1948
1949 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1950 return;
1951
1952 src = dStrdup(attrbuf);
1953
1954 textblock->addParbreak (5, html->wordStyle ());
1955
1956 bullet = new Bullet();
1957 textblock->addWidget(bullet, html->wordStyle ());
1958 textblock->addSpace(html->wordStyle ());
1959
1960 if (D_ASCII_TOLOWER(tag[1]) == 'i') {
1961 /* IFRAME usually comes with very long advertising/spying URLS,
1962 * to not break rendering we will force name="IFRAME" */
1963 textblock->addText ("IFRAME", html->wordStyle ());
1964
1965 } else {
1966 /* FRAME:
1967 * If 'name' tag is present use it, if not use 'src' value */
1968 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
1969 textblock->addText (src, html->wordStyle ());
1970 } else {
1971 textblock->addText (attrbuf, html->wordStyle ());
1972 }
1973 }
1974
1975 textblock->addParbreak (5, html->wordStyle ());
1976
1977 dFree(src);
1978}
1979
1980/*
1981 * <FRAMESET>
1982 * TODO: This is just a temporary fix while real frame support
1983 * isn't finished. Imitates lynx/w3m's frames.
1984 */
1986 const char *tag, int tagsize)
1987{
1988 HT2TB(html)->addParbreak (9, html->wordStyle ());
1989 HT2TB(html)->addText("--FRAME--", html->wordStyle ());
1990 Html_add_textblock(html, true, 5, false);
1991}
1992
1993/*
1994 * <H1> | <H2> | <H3> | <H4> | <H5> | <H6>
1995 */
1996static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
1997{
1998 a_Html_tag_set_align_attr (html, tag, tagsize);
1999
2000 a_Html_stash_init(html);
2001 S_TOP(html)->parse_mode =
2003}
2004
2005/*
2006 * <BR>
2007 */
2008static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
2009{
2010 HT2TB(html)->addLinebreak (html->wordStyle ());
2011}
2012
2013/*
2014 * <FONT>
2015 */
2016static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
2017{
2018 const char *attrbuf;
2019 char *fontFamily = NULL;
2020 int32_t color;
2021
2022 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "color"))) {
2024 color = html->visited_color;
2025 } else {
2026 /* use the tag-specified color */
2027 color = a_Html_color_parse(html, attrbuf, -1);
2028 }
2029 if (color != -1)
2031 CSS_TYPE_COLOR, color);
2032 }
2033
2034 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "face"))) {
2035 fontFamily = dStrdup(attrbuf);
2037 CSS_TYPE_SYMBOL, fontFamily);
2038 }
2039
2040 dFree(fontFamily);
2041}
2042
2043/*
2044 * <ABBR>
2045 */
2046static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
2047{
2048 const char *attrbuf;
2049
2051
2052 if (prefs.show_tooltip &&
2053 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2054
2056 attrbuf);
2057 }
2058}
2059
2063void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
2064{
2065 char *width_ptr, *height_ptr;
2066 const char *attrbuf;
2069 int w = 0, h = 0;
2070
2071 if (prefs.show_tooltip &&
2072 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2074 attrbuf);
2075 }
2076 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2077 height_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "height", NULL);
2078 // Check for malicious values
2079 // TODO: the same for percentage and relative lengths.
2080 if (width_ptr) {
2081 l_w = a_Html_parse_length (html, width_ptr);
2082 w = (int) (CSS_LENGTH_TYPE(l_w) == CSS_LENGTH_TYPE_PX ?
2083 CSS_LENGTH_VALUE(l_w) : 0);
2084 }
2085 if (height_ptr) {
2086 l_h = a_Html_parse_length (html, height_ptr);
2087 h = (int) (CSS_LENGTH_TYPE(l_h) == CSS_LENGTH_TYPE_PX ?
2088 CSS_LENGTH_VALUE(l_h) : 0);
2089 }
2090 /* Check for suspicious image size request that would cause
2091 * an excessive amount of memory to be allocated for the
2092 * image buffer.
2093 * Be careful to avoid integer overflows during the checks.
2094 * There is an additional check in dw/image.cc to catch cases
2095 * where only one dimension is given and the image is scaled
2096 * preserving its original aspect ratio.
2097 * Size requests passed via CSS are also checked there.
2098 */
2099 if (w < 0 || h < 0 ||
2100 w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
2101 (h > 0 && w > IMAGE_MAX_AREA / h)) {
2102 dFree(width_ptr);
2103 dFree(height_ptr);
2104 width_ptr = height_ptr = NULL;
2105 MSG("a_Html_common_image_attrs: suspicious image size request %d x %d\n",
2106 w, h);
2107 } else {
2114 }
2115
2116 /* TODO: we should scale the image respecting its ratio.
2117 * As the image size is not known at this time, maybe a flag
2118 * can be set to scale it later.
2119 if ((width_ptr && !height_ptr) || (height_ptr && !width_ptr))
2120 [...]
2121 */
2122
2123 /* x_img is an index to a list of {url,image} pairs.
2124 * We know a_Html_image_new() will use size() as its next index */
2126 html->images->size());
2127
2128
2129 dFree(width_ptr);
2130 dFree(height_ptr);
2131}
2132
2133DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
2134{
2135 bool load_now;
2136 char *alt_ptr;
2137 const char *attrbuf;
2138 DilloUrl *url;
2140
2141 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")) ||
2142 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
2143 return NULL;
2144
2145 alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL);
2146 if (!alt_ptr || !*alt_ptr) {
2147 dFree(alt_ptr);
2148 alt_ptr = dStrdup("[IMG]");
2149 }
2150
2151 dw::Image *dw = new dw::Image(alt_ptr);
2152 image =
2153 a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0, 0);
2154
2156
2157 if (HT2TB(html)->getBgColor())
2158 image->bg_color = HT2TB(html)->getBgColor()->getColor();
2159
2160 if (HT2TB(html)->getFgColor())
2161 image->fg_color = HT2TB(html)->getFgColor()->getColor();
2162
2164 hi->url = url;
2165 html->images->increase();
2166 html->images->set(html->images->size() - 1, hi);
2167
2168 load_now = prefs.load_images ||
2169 !dStrAsciiCasecmp(URL_SCHEME(url), "data") ||
2171
2172 if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
2173 // hi->image is NULL if dillo tries to load the image immediately
2174 hi->image = NULL;
2176 } else {
2177 // otherwise a reference is kept in html->images
2178 hi->image = image;
2179 }
2180
2181 dFree(alt_ptr);
2182 return image;
2183}
2184
2189 const DilloUrl *requester, DilloImage *Image)
2190{
2191 DilloWeb *Web;
2192 int ClientKey;
2193 /* Fill a Web structure for the cache query */
2194 Web = a_Web_new(bw, url, requester);
2195 Web->Image = Image;
2197 Web->flags |= WEB_Image;
2198 /* Request image data from the cache */
2199 if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) {
2200 a_Bw_add_client(bw, ClientKey, 0);
2201 a_Bw_add_url(bw, url);
2202 }
2203 return ClientKey != 0;
2204}
2205
2206static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
2207{
2208 int border;
2209 const char *attrbuf;
2210
2211 a_Html_common_image_attrs(html, tag, tagsize);
2212
2213 /* Spacing to the left and right */
2214 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "hspace"))) {
2215 int space = strtol(attrbuf, NULL, 10);
2216 if (space > 0) {
2222 }
2223 }
2224
2225 /* Spacing at the top and bottom */
2226 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vspace"))) {
2227 int space = strtol(attrbuf, NULL, 10);
2228 if (space > 0) {
2234 }
2235 }
2236
2237 /* Border */
2238 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "border"))) {
2239 border = strtol(attrbuf, NULL, 10);
2240 if (border >= 0) {
2250
2259 }
2260 }
2261
2262}
2263
2269static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
2270{
2272 DilloUrl *usemap_url;
2273 const char *attrbuf;
2274
2275 /* This avoids loading images. Useful for viewing suspicious HTML email. */
2276 if (URL_FLAGS(html->base_url) & URL_SpamSafe)
2277 return;
2278
2279 Image = a_Html_image_new(html, tag, tagsize);
2280 if (!Image)
2281 return;
2282
2283 usemap_url = NULL;
2284 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "usemap")))
2285 /* TODO: usemap URLs outside of the document are not used. */
2286 usemap_url = a_Html_url_new(html, attrbuf, NULL, 0);
2287
2288 // At this point, we know that Image->ir represents an image
2289 // widget. Notice that the order of the casts matters, because of
2290 // multiple inheritance.
2291 dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr;
2292 HT2TB(html)->addWidget(dwi, html->style());
2293 HT2TB(html)->addBreakOption (html->style (), false);
2294
2295 /* Image maps */
2296 if (a_Html_get_attr(html, tag, tagsize, "ismap")) {
2297 dwi->setIsMap();
2298 _MSG(" Html_tag_open_img: server-side map (ISMAP)\n");
2299 } else if (html->style ()->x_link != -1 &&
2300 usemap_url == NULL) {
2301 /* For simple links, we have to suppress the "image_pressed" signal.
2302 * This is overridden for USEMAP images. */
2303// a_Dw_widget_set_button_sensitive (IM2DW(Image->dw), FALSE);
2304 }
2305
2306 if (usemap_url) {
2307 dwi->setUseMap(&html->maps, new ::object::String(URL_STR(usemap_url)));
2308 a_Url_free (usemap_url);
2309 }
2310}
2311
2312/*
2313 * <map>
2314 */
2315static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
2316{
2317 char *hash_name;
2318 const char *attrbuf;
2319 DilloUrl *url;
2320
2321 if (html->InFlags & IN_MAP) {
2322 BUG_MSG("Nested <map>.");
2323 } else {
2324 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2325 html->InFlags |= IN_MAP;
2326 hash_name = dStrconcat("#", attrbuf, NULL);
2327 url = a_Html_url_new(html, hash_name, NULL, 0);
2328 html->maps.startNewMap(new ::object::String(URL_STR(url)));
2329 a_Url_free (url);
2330 dFree(hash_name);
2331 } else {
2332 BUG_MSG("<map> requires name attribute.");
2333 }
2334 }
2335}
2336
2341{
2342 /* This is a hack for the perhaps frivolous feature of drawing image map
2343 * shapes when there is no image to display. If this map is defined after
2344 * an image that has not been loaded (img != NULL), tell the image to
2345 * redraw. (It will only do so if it uses a map.)
2346 */
2347 for (int i = 0; i < html->images->size(); i++) {
2348 DilloImage *img = html->images->get(i)->image;
2349
2350 if (img) {
2351 // At this point, we know that img->ir represents an image
2352 // widget. (Really? Is this assumtion safe?) Notice that the
2353 // order of the casts matters, because of multiple
2354 // inheritance.
2356 dwi->forceMapRedraw();
2357 }
2358 }
2359 html->InFlags &= ~IN_MAP;
2360}
2361
2365static
2367{
2368 int coord;
2369 const char *tail = str;
2370 char *newtail = NULL;
2372
2373 while (1) {
2374 coord = strtol(tail, &newtail, 10);
2375 if (coord == 0 && newtail == tail)
2376 break;
2377 coords->increase();
2378 coords->set(coords->size() - 1, coord);
2379 while (isspace(*newtail))
2380 newtail++;
2381 if (!*newtail)
2382 break;
2383 if (*newtail != ',') {
2384 BUG_MSG("<area> coords must be integers separated by commas.");
2385 }
2386 tail = newtail + 1;
2387 }
2388
2389 return coords;
2390}
2391
2392/*
2393 * <AREA>
2394 */
2395static void
2396 Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
2397{
2398 enum types {UNKNOWN, RECTANGLE, CIRCLE, POLYGON, BACKGROUND};
2399 types type;
2400 misc::SimpleVector<int> *coords = NULL;
2401 DilloUrl* url;
2402 const char *attrbuf;
2403 int link = -1;
2404 Shape *shape = NULL;
2405
2406 if (!(html->InFlags & IN_MAP)) {
2407 BUG_MSG("<area> not inside <map>.");
2408 return;
2409 }
2410 attrbuf = a_Html_get_attr(html, tag, tagsize, "shape");
2411
2412 if (!attrbuf || !*attrbuf || !dStrAsciiCasecmp(attrbuf, "rect")) {
2413 /* the default shape is a rectangle */
2414 type = RECTANGLE;
2415 } else if (dStrAsciiCasecmp(attrbuf, "default") == 0) {
2416 /* "default" is the background */
2417 type = BACKGROUND;
2418 } else if (dStrAsciiCasecmp(attrbuf, "circle") == 0) {
2419 type = CIRCLE;
2420 } else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) {
2421 type = POLYGON;
2422 } else {
2423 BUG_MSG("<area> unknown shape: '%s'.", attrbuf);
2424 type = UNKNOWN;
2425 }
2426 if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
2427 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) {
2428 coords = Html_read_coords(html, attrbuf);
2429
2430 if (type == RECTANGLE) {
2431 if (coords->size() != 4)
2432 BUG_MSG("<area> rectangle must have four coordinate values.");
2433 if (coords->size() >= 4)
2434 shape = new Rectangle(coords->get(0),
2435 coords->get(1),
2436 coords->get(2) - coords->get(0),
2437 coords->get(3) - coords->get(1));
2438 } else if (type == CIRCLE) {
2439 if (coords->size() != 3)
2440 BUG_MSG("<area> circle must have three coordinate values.");
2441 if (coords->size() >= 3)
2442 shape = new Circle(coords->get(0), coords->get(1),
2443 coords->get(2));
2444 } else if (type == POLYGON) {
2445 Polygon *poly;
2446 int i;
2447 if (coords->size() % 2)
2448 BUG_MSG("<area> polygon with odd number of coordinates.");
2449 shape = poly = new Polygon();
2450 for (i = 0; i < (coords->size() / 2); i++)
2451 poly->addPoint(coords->get(2*i), coords->get(2*i + 1));
2452 }
2453 delete(coords);
2454 }
2455 }
2456 if (shape != NULL || type == BACKGROUND) {
2457 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2458 url = a_Html_url_new(html, attrbuf, NULL, 0);
2459 dReturn_if_fail ( url != NULL );
2460
2461 link = Html_set_new_link(html, &url);
2462 }
2463 if (type == BACKGROUND)
2464 html->maps.setCurrentMapDefaultLink(link);
2465 else
2466 html->maps.addShapeToCurrentMap(shape, link);
2467 }
2468}
2469
2470/*
2471 * <OBJECT>
2472 * Simply provide a link if the object is something downloadable.
2473 */
2474static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
2475{
2476 DilloUrl *url, *base_url = NULL;
2477 const char *attrbuf;
2478
2479 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "codebase"))) {
2480 base_url = a_Html_url_new(html, attrbuf, NULL, 0);
2481 }
2482
2483 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "data"))) {
2484 url = a_Html_url_new(html, attrbuf,
2485 URL_STR(base_url), (base_url != NULL));
2486 dReturn_if_fail ( url != NULL );
2487
2489 html->styleEngine->setPseudoVisited ();
2490 } else {
2491 html->styleEngine->setPseudoLink ();
2492 }
2493
2495 Html_set_new_link(html, &url));
2496 }
2498}
2499
2500static void Html_tag_content_object(DilloHtml *html, const char *tag,
2501 int tagsize)
2502{
2503 if (a_Html_get_attr(html, tag, tagsize, "data"))
2504 HT2TB(html)->addText("[OBJECT]", html->wordStyle ());
2505}
2506
2507/*
2508 * <VIDEO>
2509 * Provide a link to the video.
2510 */
2511static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
2512{
2513 DilloUrl *url;
2514 const char *attrbuf;
2515
2516 if (html->InFlags & IN_MEDIA) {
2517 MSG("<video> not handled when already inside a media element.\n");
2518 return;
2519 }
2520 /* TODO: poster attr */
2521
2522 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2523 url = a_Html_url_new(html, attrbuf, NULL, 0);
2524 dReturn_if_fail ( url != NULL );
2525
2527 html->styleEngine->setPseudoVisited ();
2528 } else {
2529 html->styleEngine->setPseudoLink ();
2530 }
2531
2533 Html_set_new_link(html, &url));
2534
2535 HT2TB(html)->addText("[VIDEO]", html->wordStyle ());
2536 }
2537 html->InFlags |= IN_MEDIA;
2538}
2539
2540/*
2541 * <AUDIO>
2542 * Provide a link to the audio.
2543 */
2544static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
2545{
2546 DilloUrl *url;
2547 const char *attrbuf;
2548
2549 if (html->InFlags & IN_MEDIA) {
2550 MSG("<audio> not handled when already inside a media element.\n");
2551 return;
2552 }
2553
2554 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2555 url = a_Html_url_new(html, attrbuf, NULL, 0);
2556 dReturn_if_fail ( url != NULL );
2557
2559 html->styleEngine->setPseudoVisited ();
2560 } else {
2561 html->styleEngine->setPseudoLink ();
2562 }
2563
2565 Html_set_new_link(html, &url));
2566
2567 HT2TB(html)->addText("[AUDIO]", html->wordStyle ());
2568 }
2569 html->InFlags |= IN_MEDIA;
2570}
2571
2572/*
2573 * <SOURCE>
2574 * Media resource; provide a link to its address.
2575 */
2576static void Html_tag_open_source(DilloHtml *html, const char *tag,
2577 int tagsize)
2578{
2579 const char *attrbuf;
2580
2581 if (!(html->InFlags & IN_MEDIA)) {
2582 // Can also be inside a picture element.
2583 // BUG_MSG("<source> not inside a media element.");
2584 return;
2585 }
2586 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2587 BUG_MSG("<source> requires src attribute.");
2588 return;
2589 } else {
2590 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2591
2592 dReturn_if_fail ( url != NULL );
2593
2595 html->styleEngine->setPseudoVisited ();
2596 } else {
2597 html->styleEngine->setPseudoLink ();
2598 }
2600 Html_set_new_link(html, &url));
2601 }
2602}
2603
2604static void Html_tag_content_source(DilloHtml *html, const char *tag,
2605 int tagsize)
2606{
2607 if ((html->InFlags & IN_MEDIA) && a_Html_get_attr(html, tag, tagsize,"src"))
2608 HT2TB(html)->addText("[MEDIA SOURCE]", html->wordStyle ());
2609}
2610
2615{
2616 html->InFlags &= ~IN_MEDIA;
2617}
2618
2619/*
2620 * <EMBED>
2621 * Provide a link to embedded content.
2622 */
2623static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
2624{
2625 const char *attrbuf;
2626
2627 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2628 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2629
2630 dReturn_if_fail ( url != NULL );
2631
2633 html->styleEngine->setPseudoVisited ();
2634 } else {
2635 html->styleEngine->setPseudoLink ();
2636 }
2637
2639 Html_set_new_link(html, &url));
2640 }
2641}
2642
2643static void Html_tag_content_embed(DilloHtml *html,const char *tag,int tagsize)
2644{
2645 if (a_Html_get_attr(html, tag, tagsize, "src"))
2646 HT2TB(html)->addText("[EMBED]", html->wordStyle ());
2647}
2648
2652static const char* Html_get_javascript_link(DilloHtml *html)
2653{
2654 size_t i;
2655 char ch, *p1, *p2;
2656 Dstr *Buf = html->attr_data;
2657
2658 if (dStrnAsciiCasecmp("javascript", Buf->str, 10) == 0) {
2659 i = strcspn(Buf->str, "'\"");
2660 ch = Buf->str[i];
2661 if ((ch == '"' || ch == '\'') &&
2662 (p2 = strchr(Buf->str + i + 1 , ch))) {
2663 p1 = Buf->str + i;
2664 BUG_MSG("Link depends on javascript().");
2665 dStr_truncate(Buf, p2 - Buf->str);
2666 dStr_erase(Buf, 0, p1 - Buf->str + 1);
2667 }
2668 }
2669 return Buf->str;
2670}
2671
2675static void Html_add_anchor(DilloHtml *html, const char *name)
2676{
2677 _MSG("Registering ANCHOR: %s\n", name);
2678 if (!HT2TB(html)->addAnchor (name, html->style ()))
2679 BUG_MSG("Anchor names must be unique within the document (\"%s\").",
2680 name);
2681 /*
2682 * According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that
2683 * differ only in case may not appear in the same document", but
2684 * "comparisons between fragment identifiers and anchor names must be
2685 * done by exact (case-sensitive) match." We ignore the case issue and
2686 * always test for exact matches. Moreover, what does uppercase mean
2687 * for Unicode characters outside the ASCII range?
2688 */
2689}
2690
2691/*
2692 * <A>
2693 */
2694static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
2695{
2696 DilloUrl *url;
2697 const char *attrbuf;
2698
2699 /* TODO: add support for MAP with A HREF */
2700 html->InFlags |= IN_A;
2701 if (html->InFlags & IN_MAP)
2702 Html_tag_content_area(html, tag, tagsize);
2703
2704 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2705 /* if it's a javascript link, extract the reference. */
2706 if (D_ASCII_TOLOWER(attrbuf[0]) == 'j')
2707 attrbuf = Html_get_javascript_link(html);
2708
2709 url = a_Html_url_new(html, attrbuf, NULL, 0);
2710 dReturn_if_fail ( url != NULL );
2711
2713 html->InVisitedLink = true;
2714 html->styleEngine->setPseudoVisited ();
2715 if (html->non_css_visited_color != -1)
2718 html->non_css_visited_color);
2719 } else {
2720 html->styleEngine->setPseudoLink ();
2721 if (html->non_css_link_color != -1)
2724 html->non_css_link_color);
2725 }
2726
2728 Html_set_new_link(html, &url));
2729 }
2730 if (prefs.show_tooltip &&
2731 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2733 attrbuf);
2734 }
2735
2737
2738 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2739 char *nameVal;
2740 const char *id = html->styleEngine->getId ();
2741
2743 Html_check_name_val(html, attrbuf, "name");
2744
2745 nameVal = a_Url_decode_hex_str(attrbuf);
2746
2747 if (nameVal) {
2748 /* We compare the "id" value with the url-decoded "name" value */
2749 if (!id || strcmp(nameVal, id)) {
2750 if (id)
2751 BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.",
2752 id, nameVal);
2753 Html_add_anchor(html, nameVal);
2754 }
2755
2756 dFree(nameVal);
2757 }
2758 }
2759}
2760
2761/*
2762 * <A> close function
2763 */
2764static void Html_tag_close_a(DilloHtml *html)
2765{
2766 html->InFlags &= ~IN_A;
2767 html->InVisitedLink = false;
2768}
2769
2770/*
2771 * <BLOCKQUOTE>
2772 */
2774 const char *tag, int tagsize)
2775{
2776 Html_add_textblock(html, true, 9, false);
2777}
2778
2779/*
2780 * <Q>
2781 */
2782static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
2783{
2784 /*
2785 * Left Double Quotation Mark, which is wrong in many cases, but
2786 * should at least be widely recognized.
2787 */
2788 const char *U201C = "\xe2\x80\x9c";
2789
2791 HT2TB(html)->addText (U201C, html->wordStyle ());
2792}
2793
2794/*
2795 * </Q>
2796 */
2797static void Html_tag_close_q(DilloHtml *html)
2798{
2799 /* Right Double Quotation Mark */
2800 const char *U201D = "\xe2\x80\x9d";
2801
2802 HT2TB(html)->addText (U201D, html->wordStyle ());
2803}
2804
2805/*
2806 * Handle the <UL> tag.
2807 */
2808static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
2809{
2810 const char *attrbuf;
2811 ListStyleType list_style_type;
2812
2813 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2814
2815 /* list_style_type explicitly defined */
2816 if (dStrAsciiCasecmp(attrbuf, "disc") == 0)
2817 list_style_type = LIST_STYLE_TYPE_DISC;
2818 else if (dStrAsciiCasecmp(attrbuf, "circle") == 0)
2819 list_style_type = LIST_STYLE_TYPE_CIRCLE;
2820 else if (dStrAsciiCasecmp(attrbuf, "square") == 0)
2821 list_style_type = LIST_STYLE_TYPE_SQUARE;
2822 else
2823 /* invalid value */
2824 list_style_type = LIST_STYLE_TYPE_DISC;
2825
2827 CSS_TYPE_ENUM, list_style_type);
2828 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2829 BUG_MSG("<ul> type attribute is obsolete.");
2830 }
2831
2832 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2833 S_TOP(html)->list_number = 0;
2834 S_TOP(html)->ref_list_item = NULL;
2835}
2836
2837/*
2838 * Handle the <DIR> or <MENU> tag.
2839 * (Deprecated and almost the same as <UL>)
2840 */
2841static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
2842{
2844 HT2TB(html)->addParbreak (9, html->wordStyle ());
2845
2846 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2847 S_TOP(html)->list_number = 0;
2848 S_TOP(html)->ref_list_item = NULL;
2849
2851 BUG_MSG("Obsolete list type; use <ul> instead.");
2852}
2853
2854/*
2855 * Handle the <MENU> tag.
2856 */
2857static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
2858{
2859 /* In another bit of ridiculous mess from the HTML5 world, the menu
2860 * element, which was deprecated in HTML4:
2861 * - does not appear at all in W3C's HTML5 spec
2862 * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it
2863 * means something totally different than it did in the old days
2864 * (now it's for popup menus and toolbar menus rather than being a
2865 * sort of list).
2866 */
2867 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))
2868 Html_tag_open_dir(html, tag, tagsize);
2869}
2870
2871/*
2872 * Handle the <OL> tag.
2873 */
2874static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
2875{
2876 const char *attrbuf;
2877 int n = 1;
2878
2879 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2880 ListStyleType listStyleType = LIST_STYLE_TYPE_DECIMAL;
2881
2882 if (*attrbuf == '1')
2883 listStyleType = LIST_STYLE_TYPE_DECIMAL;
2884 else if (*attrbuf == 'a')
2885 listStyleType = LIST_STYLE_TYPE_LOWER_ALPHA;
2886 else if (*attrbuf == 'A')
2887 listStyleType = LIST_STYLE_TYPE_UPPER_ALPHA;
2888 else if (*attrbuf == 'i')
2889 listStyleType = LIST_STYLE_TYPE_LOWER_ROMAN;
2890 else if (*attrbuf == 'I')
2891 listStyleType = LIST_STYLE_TYPE_UPPER_ROMAN;
2892
2894 CSS_TYPE_ENUM, listStyleType);
2895 }
2896
2897 S_TOP(html)->list_type = HTML_LIST_ORDERED;
2898
2899 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) &&
2900 (n = (int) strtol(attrbuf, NULL, 10)) < 0) {
2901 BUG_MSG("Illegal '-' character in START attribute; Starting from 0.");
2902 n = 0;
2903 }
2904 S_TOP(html)->list_number = n;
2905 S_TOP(html)->ref_list_item = NULL;
2906}
2907
2908/*
2909 * Handle the <LI> tag.
2910 */
2911static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
2912{
2913 Style *style = html->style ();
2914 int *list_number;
2915 const char *attrbuf;
2916
2917 if (S_TOP(html)->list_type == HTML_LIST_NONE &&
2918 !(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
2919 /* In WHATWG's HTML5 and W3C's HTML 5.1, LI can appear within MENUs
2920 * of the toolbar type.
2921 */
2922 BUG_MSG("<li> outside <ul> or <ol>.");
2923 }
2924
2925 html->InFlags |= IN_LI;
2926
2927 /* Get our parent tag's variables (used as state storage) */
2928 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
2929
2931 // ordered
2932 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) &&
2933 (*list_number = strtol(attrbuf, NULL, 10)) < 0) {
2934 BUG_MSG("Illegal negative list value attribute; Starting from 0.");
2935 *list_number = 0;
2936 }
2937 }
2938}
2939
2940/*
2941 * Close <LI>.
2942 */
2944{
2945 html->InFlags &= ~IN_LI;
2946 ((ListItem *)html->dw)->flush ();
2947}
2948
2949/*
2950 * <HR>
2951 */
2952static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
2953{
2954 char *width_ptr;
2955 const char *attrbuf;
2956 int32_t size = 0;
2957
2958 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2959 if (width_ptr) {
2960 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2961 BUG_MSG("<hr> width attribute is obsolete.");
2964 a_Html_parse_length (html, width_ptr));
2965 dFree(width_ptr);
2966 }
2967
2968 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) {
2969 size = strtol(attrbuf, NULL, 10);
2970 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2971 BUG_MSG("<hr> size attribute is obsolete.");
2972 }
2973
2974 a_Html_tag_set_align_attr(html, tag, tagsize);
2975
2976 /* TODO: evaluate attribute */
2977 if (a_Html_get_attr(html, tag, tagsize, "noshade")) {
2978 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2979 BUG_MSG("<hr> noshade attribute is obsolete.");
2988
2989 if (size <= 0)
2990 size = 1;
2991 }
2992
2993 if (size > 0) {
2994 CssLength size_top = CSS_CREATE_LENGTH ((size+1)/2, CSS_LENGTH_TYPE_PX);
2995 CssLength size_bottom = CSS_CREATE_LENGTH (size / 2, CSS_LENGTH_TYPE_PX);
2997 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
2999 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
3002 size_bottom);
3005 size_bottom);
3006 }
3007
3008}
3009
3010static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
3011{
3012 Widget *hruler;
3013 HT2TB(html)->addParbreak (5, html->wordStyle ());
3014
3015 hruler = new Ruler();
3016 hruler->setStyle (html->style ());
3017 HT2TB(html)->addWidget (hruler, html->style ());
3018 HT2TB(html)->addParbreak (5, html->wordStyle ());
3019}
3020
3021/*
3022 * <DL>
3023 */
3024static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
3025{
3026 /* may want to actually do some stuff here. */
3028 HT2TB(html)->addParbreak (9, html->wordStyle ());
3029}
3030
3031/*
3032 * <DT>
3033 */
3034static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
3035{
3037 HT2TB(html)->addParbreak (9, html->wordStyle ());
3038}
3039
3040/*
3041 * <DD>
3042 */
3043static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
3044{
3045 Html_add_textblock(html, true, 9, false);
3046}
3047
3048/*
3049 * <PRE>
3050 */
3051static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
3052{
3054 HT2TB(html)->addParbreak (9, html->wordStyle ());
3055
3056 html->InFlags |= IN_PRE;
3057}
3058
3059/*
3060 * Custom close for <PRE>
3061 */
3063{
3064 html->InFlags &= ~IN_PRE;
3065}
3066
3071static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
3072{
3073 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
3074 /* HTML5 doesn't say anything about excluding elements */
3075 const char *es_set[] = {"img", "object", "applet", "big", "small", "sub",
3076 "sup", "font", "basefont", NULL};
3077 static int ei_set[10], i;
3078
3079 /* initialize array */
3080 if (!ei_set[0])
3081 for (i = 0; es_set[i]; ++i)
3082 ei_set[i] = a_Html_tag_index(es_set[i]);
3083
3084 for (i = 0; ei_set[i]; ++i)
3085 if (tag_idx == ei_set[i])
3086 return 1;
3087 }
3088 return 0;
3089}
3090
3094static void Html_update_content_type(DilloHtml *html, const char *content)
3095{
3096 const char *new_content = a_Capi_set_content_type(html->page_url, content,
3097 "meta");
3098 /* Cannot ask cache whether the content type was changed, as
3099 * this code in another bw might have already changed it for us.
3100 */
3101 if (a_Misc_content_type_cmp(html->content_type, new_content)) {
3102 html->stop_parser = true; /* The cache buffer is no longer valid */
3103 a_UIcmd_repush(html->bw);
3104 }
3105}
3106
3120static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
3121{
3122 const char meta_template[] =
3123"<table width='100%%'><tr><td bgcolor='#ee0000'>Warning:</td>\n"
3124" <td bgcolor='#8899aa' width='100%%'>\n"
3125" This page uses the NON-STANDARD meta refresh tag.<br> The HTML 4.01 SPEC\n"
3126" (sec 7.4.4) recommends explicitly to avoid it.</td></tr>\n"
3127" <tr><td bgcolor='#a0a0a0' colspan='2'>The author wanted you to go\n"
3128" <a href='%s'>here</a>%s</td></tr></table><br>\n";
3129
3130 const char *p, *equiv, *charset, *content;
3131 char delay_str[64], *mr_url;
3132 DilloUrl *new_url;
3133 int delay;
3134
3135 /* only valid inside HEAD */
3136 if (!(html->InFlags & IN_HEAD)) {
3137 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3138 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3139 /* With the HTML 5.1 draft spec, meta with itemprop may appear
3140 * in the body.
3141 */
3142 BUG_MSG("This <meta> element must be inside the HEAD section.");
3143 }
3144 return;
3145 }
3146
3147 if ((equiv = a_Html_get_attr(html, tag, tagsize, "http-equiv"))) {
3148 if (!dStrAsciiCasecmp(equiv, "refresh") &&
3149 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3150
3151 /* Get delay, if present, and make a message with it */
3152 if ((delay = strtol(content, NULL, 0))) {
3153 snprintf(delay_str, 64, " after %d second%s.",
3154 delay, (delay > 1) ? "s" : "");
3155 } else {
3156 sprintf(delay_str, ".");
3157 }
3158 /* Skip to anything after "URL=" or ";" if "URL=" is not found */
3159 int has_url = 1;
3160 if ((p = dStriAsciiStr(content, "url=")))
3161 content = p + strlen("url=");
3162 else if ((p = strstr(content, ";")))
3163 content = p + strlen(";");
3164 else
3165 has_url = 0;
3166
3167 if (has_url) {
3168 /* Handle the case of a quoted URL */
3169 if (*content == '"' || *content == '\'') {
3170 if ((p = strchr(content + 1, *content)))
3171 mr_url = dStrndup(content + 1, p - content - 1);
3172 else
3173 mr_url = dStrdup(content + 1);
3174 } else {
3175 mr_url = dStrdup(content);
3176 }
3177 } else {
3178 mr_url = dStrdup("");
3179 }
3180
3181 new_url = a_Html_url_new(html, mr_url, NULL, 0);
3182
3183 if (a_Url_cmp(html->base_url, new_url) == 0) {
3184 /* redirection loop, or empty url string: ignore */
3185 BUG_MSG("<meta> refresh: %s.",
3186 *mr_url ? "redirection loop" : "no target URL");
3187 } else if (delay == 0) {
3188 /* zero-delay redirection */
3189 html->stop_parser = true;
3190 if (URL_FLAGS(html->base_url) & URL_SpamSafe) {
3191 a_UIcmd_set_msg(html->bw,
3192 "WARNING: local URL with META refresh. Aborting.");
3193 } else if (a_Capi_dpi_verify_request(html->bw, new_url)) {
3194 a_UIcmd_redirection0((void*)html->bw, new_url);
3195 }
3196 } else {
3197 /* Send a custom HTML message.
3198 * TODO: This is a hairy hack,
3199 * It'd be much better to build a widget. */
3200 Dstr *ds_msg = dStr_sized_new(256);
3201 dStr_sprintf(ds_msg, meta_template, URL_STR(new_url), delay_str);
3202 {
3203 int o_InFlags = html->InFlags;
3204 int o_TagSoup = html->TagSoup;
3205 html->InFlags = IN_BODY + IN_META_HACK;
3206 html->TagSoup = false;
3207 Html_write_raw(html, ds_msg->str, ds_msg->len, 0);
3208 html->TagSoup = o_TagSoup;
3209 html->InFlags = o_InFlags;
3210 }
3211 dStr_free(ds_msg, 1);
3212 }
3213 a_Url_free(new_url);
3214 dFree(mr_url);
3215
3216 } else if (!dStrAsciiCasecmp(equiv, "content-type") &&
3217 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3218 _MSG("Html_tag_open_meta: content={%s}\n", content);
3219 Html_update_content_type(html, content);
3220 }
3221 } else if (html->DocType == DT_HTML && html->DocTypeVersion == 5.0f &&
3222 (charset = a_Html_get_attr(html, tag, tagsize, "charset"))) {
3223 char *content = dStrconcat("text/html; charset=", charset, NULL);
3224
3225 Html_update_content_type(html, content);
3226 dFree(content);
3227 }
3228}
3229
3233static void Html_css_load_callback(int Op, CacheClient_t *Client)
3234{
3235 _MSG("Html_css_load_callback: Op=%d\n", Op);
3236 if (Op) { /* EOF */
3237 BrowserWindow *bw = ((DilloWeb *)Client->Web)->bw;
3238 /* Repush when we've got them all */
3239 if (--bw->NumPendingStyleSheets == 0)
3241 }
3242}
3243
3248{
3249 char *data;
3250 int len;
3251
3252 dReturn_if (url == NULL || ! prefs.load_stylesheets);
3253
3254 _MSG("Html_load_stylesheet: ");
3256 a_Capi_get_buf(url, &data, &len)) {
3257 _MSG("cached URL=%s len=%d", URL_STR(url), len);
3258 if (strncmp("@charset \"", data, 10) == 0) {
3259 char *endq = strchr(data+10, '"');
3260
3261 if (endq && (endq - data <= 51)) {
3262 /* IANA limits charset names to 40 characters */
3263 char *content_type;
3264
3265 *endq = '\0';
3266 content_type = dStrconcat("text/css; charset=", data+10, NULL);
3267 *endq = '"';
3268 a_Capi_unref_buf(url);
3271 a_Capi_get_buf(url, &data, &len);
3272 }
3273 }
3274 html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
3275 a_Capi_unref_buf(url);
3276 } else {
3277 /* Fill a Web structure for the cache query */
3278 int ClientKey;
3279 DilloWeb *Web = a_Web_new(html->bw, url, html->page_url);
3280 Web->flags |= WEB_Stylesheet;
3281 if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) {
3282 ++html->bw->NumPendingStyleSheets;
3283 a_Bw_add_client(html->bw, ClientKey, 0);
3284 a_Bw_add_url(html->bw, url);
3285 MSG("NumPendingStyleSheets=%d\n", html->bw->NumPendingStyleSheets);
3286 }
3287 }
3288 _MSG("\n");
3289}
3290
3299static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
3300{
3301 DilloUrl *url;
3302 const char *attrbuf;
3303
3304 //char *tag_str = dStrndup(tag, tagsize);
3305 //MSG("Html_tag_open_link(): %s\n", tag_str);
3306 //dFree(tag_str);
3307
3308 /* When viewing suspicious HTML email, don't load LINK */
3310
3311 /* Ignore LINK outside HEAD */
3312 if (!(html->InFlags & IN_HEAD)) {
3313 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3314 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3315 /* With the HTML 5.1 draft spec, link with itemprop may appear
3316 * in the body.
3317 */
3318 BUG_MSG("This <link> element must be inside the HEAD section.");
3319 }
3320 return;
3321 }
3322 /* Remote stylesheets enabled? */
3324 /* CSS stylesheet link */
3325 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "rel")) ||
3326 dStrAsciiCasecmp(attrbuf, "stylesheet"))
3327 return;
3328
3329 /* IMPLIED attributes? */
3330 if (((attrbuf = a_Html_get_attr(html, tag, tagsize, "type")) &&
3331 dStrAsciiCasecmp(attrbuf, "text/css")) ||
3332 ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
3333 !dStriAsciiStr(attrbuf, "screen") && dStrAsciiCasecmp(attrbuf, "all")))
3334 return;
3335
3336 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "href")) ||
3337 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
3338 return;
3339
3340 _MSG(" Html_tag_open_link(): addCssUrl %s\n", URL_STR(url));
3341
3342 html->addCssUrl(url);
3343 a_Url_free(url);
3344}
3345
3349static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
3350{
3351 const char *attrbuf;
3352 DilloUrl *BaseUrl;
3353
3354 if (html->InFlags & IN_HEAD) {
3355 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
3356 bool_t html5 = html->DocType == DT_HTML &&
3357 html->DocTypeVersion >= 5.0f;
3358
3359 BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) :
3360 a_Html_url_new(html, attrbuf, "", 1);
3361
3362 if (html5 || URL_SCHEME_(BaseUrl)) {
3363 /* Pass the URL_SpamSafe flag to the new base url */
3365 BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe);
3366 a_Url_free(html->base_url);
3367 html->base_url = BaseUrl;
3368 } else {
3369 BUG_MSG("<base> URI is relative (it MUST be absolute).");
3370 a_Url_free(BaseUrl);
3371 }
3372 }
3373 } else {
3374 BUG_MSG("<base> not inside HEAD section.");
3375 }
3376}
3377
3378static void Html_tag_open_default(DilloHtml *html,const char *tag,int tagsize)
3379{
3381}
3382
3383/*
3384 * <SPAN>
3385 */
3386static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
3387{
3388 const char *attrbuf;
3389
3391
3392 if (prefs.show_tooltip &&
3393 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3394
3396 attrbuf);
3397 }
3398}
3399
3400/*
3401 * html5 sectioning stuff: article aside nav section header footer
3402 */
3403static void Html_tag_open_sectioning(DilloHtml *html, const char *tag,
3404 int tagsize)
3405{
3406 const char *attrbuf;
3407
3408 if (prefs.show_tooltip &&
3409 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3410
3412 attrbuf);
3413 }
3414}
3415
3416/*
3417 * <DIV> (TODO: make a complete implementation)
3418 */
3419static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
3420{
3421 a_Html_tag_set_align_attr (html, tag, tagsize);
3422 Html_tag_open_sectioning(html, tag, tagsize);
3423}
3424
3429{
3430 HT2TB(html)->addParbreak (9, html->wordStyle ());
3431}
3432
3433/*
3434 * <WBR> "The wbr element represents a line break opportunity."
3435 */
3436static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
3437{
3438 HT2TB(html)->addBreakOption(html->wordStyle (), true);
3439}
3440
3441
3469static const TagInfo Tags[] = {
3470 {"a", B8(01011),'R', Html_tag_open_a, NULL, Html_tag_close_a},
3471 {"abbr", B8(01011),'R', Html_tag_open_abbr, NULL, NULL},
3472 /* acronym 010101 -- obsolete in HTML5 */
3473 {"address", B8(01110),'R', Html_tag_open_default, NULL, Html_tag_close_par},
3474 {"area", B8(01001),'F', Html_tag_open_default, Html_tag_content_area, NULL},
3475 {"article", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3476 {"aside", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3477 {"audio", B8(01111),'R', Html_tag_open_audio, NULL, Html_tag_close_media},
3478 {"b", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3479 {"base", B8(10001),'F', Html_tag_open_base, NULL, NULL},
3480 /* basefont 010001 -- obsolete in HTML5 */
3481 /* bdo 010101 */
3482 {"big", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3483 {"blockquote", B8(01110),'R', Html_tag_open_blockquote, NULL, NULL},
3484 {"body", B8(01110),'O', Html_tag_open_body, NULL, Html_tag_close_body},
3485 {"br", B8(01001),'F', Html_tag_open_default, Html_tag_content_br, NULL},
3486 {"button", B8(01111),'R', Html_tag_open_button,NULL,Html_tag_close_button},
3487 /* caption */
3488 {"center", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3489 {"cite", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3490 {"code", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3491 /* col 010010 'F' */
3492 /* colgroup */
3493 {"dd", B8(01110),'O', Html_tag_open_dd, NULL, NULL},
3494 {"del", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3495 {"dfn", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3496 {"dir", B8(01100),'R', Html_tag_open_dir, NULL, Html_tag_close_par},
3497 /* TODO: complete <div> support! */
3498 {"div", B8(01110),'R', Html_tag_open_div, NULL, NULL},
3499 {"dl", B8(01100),'R', Html_tag_open_dl, NULL, Html_tag_close_par},
3500 {"dt", B8(01010),'O', Html_tag_open_dt, NULL, Html_tag_close_par},
3501 {"em", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3502 {"embed", B8(01001),'F', Html_tag_open_embed, Html_tag_content_embed, NULL},
3503 /* fieldset */
3504 {"figcaption", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3505 {"figure", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3506 {"font", B8(01011),'R', Html_tag_open_font, NULL, NULL},
3507 {"footer", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3508 {"form", B8(01110),'R', Html_tag_open_form, NULL, Html_tag_close_form},
3509 {"frame", B8(01000),'F', Html_tag_open_frame, Html_tag_content_frame, NULL},
3510 {"frameset", B8(01110),'R', Html_tag_open_default, Html_tag_content_frameset,
3511 NULL},
3512 {"h1", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3513 {"h2", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3514 {"h3", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3515 {"h4", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3516 {"h5", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3517 {"h6", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3518 {"head", B8(10111),'O', Html_tag_open_head, NULL, Html_tag_close_head},
3519 {"header", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3520 {"hr", B8(01000),'F', Html_tag_open_hr, Html_tag_content_hr, NULL},
3521 {"html", B8(00110),'O', Html_tag_open_html, NULL, Html_tag_close_html},
3522 {"i", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3523 {"iframe", B8(01111),'R', Html_tag_open_frame, Html_tag_content_frame, NULL},
3524 {"img", B8(01001),'F', Html_tag_open_img, Html_tag_content_img, NULL},
3525 {"input", B8(01001),'F', Html_tag_open_input, NULL, NULL},
3526 {"ins", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3527 {"isindex", B8(11001),'F', Html_tag_open_isindex, NULL, NULL},
3528 {"kbd", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3529 /* label 010101 */
3530 /* legend 01?? */
3531 {"li", B8(01110),'O', Html_tag_open_li, NULL, Html_tag_close_li},
3532 {"link", B8(10001),'F', Html_tag_open_link, NULL, NULL},
3533 {"main", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3534 {"map", B8(01101),'R', Html_tag_open_default, Html_tag_content_map,
3536 {"mark", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3537 /* menu 1010 -- TODO: not exactly 1010, it can contain LI and inline */
3538 {"menu", B8(01100),'R', Html_tag_open_menu, NULL, Html_tag_close_par},
3539 {"meta", B8(11001),'F', Html_tag_open_meta, NULL, NULL},
3540 {"nav", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3541 /* noframes 1011 -- obsolete in HTML5 */
3542 /* noscript 1011 */
3543 {"object", B8(11111),'R', Html_tag_open_object, Html_tag_content_object,NULL},
3544 {"ol", B8(01100),'R', Html_tag_open_ol, NULL, NULL},
3545 {"optgroup", B8(01011),'O', Html_tag_open_optgroup, NULL,
3547 {"option", B8(01001),'O', Html_tag_open_option, NULL, Html_tag_close_option},
3548 {"p", B8(01010),'O', Html_tag_open_p, NULL, NULL},
3549 /* param 010001 'F' */
3550 {"pre", B8(01010),'R', Html_tag_open_pre, NULL, Html_tag_close_pre},
3551 {"q", B8(01011),'R', Html_tag_open_q, NULL, Html_tag_close_q},
3552 {"s", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3553 {"samp", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3554 {"script", B8(11101),'R', Html_tag_open_script,NULL,Html_tag_close_script},
3555 {"section", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3556 {"select", B8(01011),'R', Html_tag_open_select,NULL,Html_tag_close_select},
3557 {"small", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3558 {"source", B8(01001),'F', Html_tag_open_source, Html_tag_content_source,NULL},
3559 {"span", B8(01011),'R', Html_tag_open_span, NULL, NULL},
3560 {"strike", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3561 {"strong", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3562 {"style", B8(10011),'R', Html_tag_open_style, NULL, Html_tag_close_style},
3563 {"sub", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3564 {"sup", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3565 {"table", B8(01100),'R', Html_tag_open_table, Html_tag_content_table, NULL},
3566 {"tbody", B8(01010),'O', Html_tag_open_tbody, Html_tag_content_tbody, NULL},
3567 {"td", B8(01110),'O', Html_tag_open_td, Html_tag_content_td, NULL},
3568 {"textarea", B8(01011),'R', Html_tag_open_textarea, Html_tag_content_textarea,
3570 {"tfoot", B8(01010),'O', Html_tag_open_tfoot, NULL, NULL},
3571 {"th", B8(01110),'O', Html_tag_open_th, Html_tag_content_th, NULL},
3572 {"thead", B8(01010),'O', Html_tag_open_thead, NULL, NULL},
3573 {"title", B8(10011),'R', Html_tag_open_title, NULL, Html_tag_close_title},
3574 {"tr", B8(01100),'O', Html_tag_open_tr, Html_tag_content_tr, NULL},
3575 {"tt", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3576 {"u", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3577 {"ul", B8(01100),'R', Html_tag_open_ul, NULL, NULL},
3578 {"var", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3579 {"video", B8(01111),'R', Html_tag_open_video, NULL, Html_tag_close_media},
3580 {"wbr", B8(01011),'F', Html_tag_open_default, Html_tag_content_wbr, NULL}
3581};
3582#define NTAGS (sizeof(Tags)/sizeof(Tags[0]))
3583
3584/* Only available in C++11 and up */
3585#if __cpp_static_assert
3586static_assert(NTAGS == HTML_NTAGS,
3587 "Mismatch between number of tags in Tags and HTML_NTAGS");
3588#endif
3589
3590/*
3591 * Compares tag from buffer ('/' or '>' or space-ended string) [p1]
3592 * with tag from taglist (lowercase, zero ended string) [p2]
3593 * Return value: as strcmp()
3594 */
3595static int Html_tag_compare(const char *p1, const char *p2)
3596{
3597 while ( *p2 ) {
3598 if (D_ASCII_TOLOWER(*p1) != *p2)
3599 return(D_ASCII_TOLOWER(*p1) - *p2);
3600 ++p1;
3601 ++p2;
3602 }
3603 return !strchr(" >/\n\r\t", *p1);
3604}
3605
3610int a_Html_tag_index(const char *tag)
3611{
3612 int low, high, mid, cond;
3613
3614 /* Binary search */
3615 low = 0;
3616 high = NTAGS - 1; /* Last tag index */
3617 while (low <= high) {
3618 mid = (low + high) / 2;
3619 if ((cond = Html_tag_compare(tag, Tags[mid].name)) < 0 )
3620 high = mid - 1;
3621 else if (cond > 0)
3622 low = mid + 1;
3623 else
3624 return mid;
3625 }
3626 return -1;
3627}
3628
3637static int Html_triggers_optional_close(int old_idx, int cur_idx)
3638{
3639 int Flags = Tags[cur_idx].Flags;
3640 if (old_idx == i_P || old_idx == i_DT) {
3641 /* P and DT are closed by block elements (i.e. non inline)*/
3642 return (!(Flags & 1));
3643 } else if (old_idx == i_LI) {
3644 /* LI closes LI
3645 * Note: non-flow should also close it, but FF does not. */
3646 return (cur_idx == i_LI);
3647 } else if (old_idx == i_TD || old_idx == i_TH) {
3648 /* TD and TH are closed by: TD, TH and TR.
3649 * Note: non-flow should also close it, but FF does not. */
3650 return (cur_idx == i_TD || cur_idx == i_TH || cur_idx == i_TR || cur_idx == i_TBODY);
3651 } else if (old_idx == i_TR) {
3652 /* TR closes TR */
3653 return (cur_idx == i_TR || cur_idx == i_TBODY);
3654 } else if (old_idx == i_DD) {
3655 /* DD is closed by DD and DT */
3656 return (cur_idx == i_DD || cur_idx == i_DT);
3657 } else if (old_idx == i_OPTGROUP) {
3658 /* i_OPTGROUP can only contain OPTION */
3659 return (cur_idx != i_OPTION);
3660 } else if (old_idx == i_OPTION) {
3661 return 1; // OPTION always needs close
3662 }
3663
3664 /* Don't close HTML, HEAD and BODY. They're handled by Html_test_section().
3665 * TODO: TBODY is pending */
3666 return 0;
3667}
3668
3676static inline int Html_forbids_cross_nesting(const int InFlags,
3677 const int new_idx)
3678{
3679 int f = InFlags, ni = new_idx, oi = -1;
3680 if (f & (IN_A | IN_BUTTON | IN_SELECT | IN_TEXTAREA) &&
3681 (ni == i_A || ni == i_BUTTON || ni == i_SELECT || ni == i_TEXTAREA))
3682 oi = (f & IN_A ? i_A : f & IN_BUTTON ? i_BUTTON : f & IN_SELECT ?
3683 i_SELECT : f & IN_TEXTAREA ? i_TEXTAREA : 0);
3684 return oi;
3685}
3686
3695static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx,
3696 int new_idx, int fi, char op)
3697{
3698 int s_top, ni = new_idx;
3699 while ((s_top = html->stack->size() - 1) >= s_idx) {
3700 int toptag_idx = S_TOP(html)->tag_idx;
3701 TagInfo toptag = Tags[toptag_idx];
3702
3703 if (fi >= 0) {
3704 // forbidden nesting
3705 if (toptag_idx != fi)
3706 BUG_MSG(" Nesting cleanup - forcing close of open tag: <%s>.",
3707 toptag.name);
3708 } else if (s_top == s_idx && op == 'c') {
3709 // target tag, no bug when closing.
3710 } else if (toptag.EndTag == 'O') {
3711 // optional close, that's OK
3712 } else if ((!(toptag.Flags & 4) &&
3713 (Tags[ni].Flags & 4 || !(Tags[ni].Flags & 1))) ||
3714 (Tags[ni].Flags & 1 && !(toptag.Flags & 2))) {
3715 // block {element, container} in non block container or
3716 // inline element in non inline container
3717 BUG_MSG((op == 'o') ?
3718 "Bad nesting: <%s> can't contain <%s>. -- closing <%s>." :
3719 "<%s> needs to be closed before </%s>. -- closing <%s>.",
3720 toptag.name, Tags[ni].name, toptag.name);
3721 } else {
3722 BUG_MSG(
3723 "<%s> should have been closed before </%s>. -- closing <%s>.",
3724 toptag.name, Tags[ni].name, toptag.name);
3725 }
3726 _MSG("op(%c): %s s_top=%d s_idx=%d\n", op, toptag.name, s_top, s_idx);
3727 if (toptag_idx == i_BODY &&
3728 !((html->InFlags & IN_EOF) || html->ReqTagClose)) {
3729 (s_idx == 1 ? html->PrevWasHtmlClose : html->PrevWasBodyClose) = true;
3730 break; // only pop {BODY,HTML} upon EOF or redundancy
3731 }
3732 if (toptag.close)
3733 toptag.close(html);
3734 Html_real_pop_tag(html);
3735 }
3736}
3737
3747static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
3748{
3749 if (!html->TagSoup)
3750 return;
3751
3752 int s_top = html->stack->size() - 1, s_idx;
3753 int fi = Html_forbids_cross_nesting(html->InFlags, ni);
3754 for (s_idx = s_top; s_idx > 0; --s_idx) {
3755 int ti = html->stack->getRef(s_idx)->tag_idx;
3756
3757 if (fi >= 0) {
3758 // forbidden cross nesting found
3759 if (ti != fi)
3760 continue; // don't allow, close
3761 --s_idx;
3762 BUG_MSG("Forbidden nesting: <%s> can't contain <%s>. -- closing "
3763 "<%s>.", Tags[fi].name, Tags[ni].name, Tags[fi].name);
3764
3765 } else if ((html->InFlags & IN_PRE) && ni == i_HR) {
3766 break; // allow Apache's bad HTML directory listings...
3767
3768 } else if (Tags[ti].EndTag == 'O') { // Element with optional close
3769 if (Html_triggers_optional_close(ti, ni))
3770 continue; // close
3771 } else if (!(Tags[ni].Flags & 1) && !(Tags[ti].Flags & 4)) {
3772 // Block element over a NON block container
3773 if (ti == i_A && html->DocTypeVersion >= 5.0f)
3774 break;
3775 continue; // close
3776 }
3777
3778 break;
3779 }
3780
3781 if (s_idx < s_top)
3782 Html_tag_cleanup_to_idx(html, s_idx + 1, ni, fi, 'o');
3783}
3784
3797static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
3798{
3799 int stack_idx, tag_idx, matched = 0, expected = 0;
3800 TagInfo new_tag = Tags[new_idx];
3801
3802 /* Look for the candidate tag to close */
3803 stack_idx = html->stack->size();
3804 while (--stack_idx) {
3805 tag_idx = html->stack->getRef(stack_idx)->tag_idx;
3806 if (tag_idx == new_idx) {
3807 /* matching tag found */
3808 matched = 1;
3809 break;
3810 } else if (Tags[tag_idx].EndTag == 'O') {
3811 /* close elements with optional close */
3812 continue;
3813 } else if ((new_idx == i_A && html->InFlags & IN_A) ||
3814 (new_idx == i_BUTTON && html->InFlags & IN_BUTTON) ||
3815 (new_idx == i_SELECT && html->InFlags & IN_SELECT) ||
3816 (new_idx == i_TEXTAREA && html->InFlags & IN_TEXTAREA)) {
3817 /* Let these elements close anything left open inside them */
3818 continue;
3819 } else if (Tags[new_idx].Flags & 4 && // Block container
3820 Tags[stack_idx].Flags & 3) { // Inline element or container
3821 /* Let a block container close inline elements left open inside it. */
3822 continue;
3823 } else {
3824 /* this is the tag that should have been closed */
3825 expected = 1;
3826 break;
3827 }
3828 }
3829
3830 if (matched) {
3831 Html_tag_cleanup_to_idx(html, stack_idx, new_idx, -1, 'c');
3832 } else if (expected) {
3833 BUG_MSG("Unexpected closing tag: </%s> -- expected </%s>.",
3834 new_tag.name, Tags[tag_idx].name);
3835 } else {
3836 BUG_MSG("Unexpected closing tag: </%s>.", new_tag.name);
3837 }
3838}
3839
3844static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
3845{
3846 const char *tag;
3847 int tag_idx;
3848
3849 if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE)
3850 BUG_MSG("The required DOCTYPE declaration is missing. "
3851 "Handling as HTML4.");
3852
3853 if (!(html->InFlags & IN_HTML)) {
3854 tag = "<html>";
3855 tag_idx = a_Html_tag_index(tag + 1);
3856 if (tag_idx != new_idx || IsCloseTag) {
3857 /* implicit open */
3858 Html_force_push_tag(html, tag_idx);
3859 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3860 Tags[tag_idx].open (html, tag, strlen(tag));
3861 }
3862 }
3863
3864 if (Tags[new_idx].Flags & 16) {
3865 /* head element */
3866 if (!(html->InFlags & IN_HEAD) && html->Num_HEAD == 0) {
3867 tag = "<head>";
3868 tag_idx = a_Html_tag_index(tag + 1);
3869 if (tag_idx != new_idx || IsCloseTag) {
3870 /* implicit open of the head element */
3871 Html_force_push_tag(html, tag_idx);
3872 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3873 Tags[tag_idx].open (html, tag, strlen(tag));
3874 }
3875 }
3876
3877 } else if (Tags[new_idx].Flags & 8) {
3878 /* body element */
3879 if (html->InFlags & IN_HEAD) {
3880 tag = "</head>";
3881 tag_idx = a_Html_tag_index(tag + 2);
3882 Html_tag_cleanup_at_close(html, tag_idx);
3883 }
3884 tag = "<body>";
3885 tag_idx = a_Html_tag_index(tag + 1);
3886 if (tag_idx != new_idx || IsCloseTag) {
3887 /* implicit open */
3888 Html_force_push_tag(html, tag_idx);
3889 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3890 Tags[tag_idx].open (html, tag, strlen(tag));
3891 }
3892 }
3893}
3894
3898static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
3899{
3900 const char *attrbuf;
3901 char lang[3];
3902
3903 if (tagsize >= 8 && /* length of "<t id=i>" */
3904 (attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) {
3905 /* According to the SGML declaration of HTML 4, all NAME values
3906 * occuring outside entities must be converted to uppercase
3907 * (this is what "NAMECASE GENERAL YES" says). But the HTML 4
3908 * spec states in Sec. 7.5.2 that anchor ids are case-sensitive.
3909 * So we don't do it and hope for better specs in the future ...
3910 */
3911 Html_check_name_val(html, attrbuf, "id");
3912
3913 html->styleEngine->setId(attrbuf);
3914 }
3915
3916 if (tagsize >= 11 && (prefs.parse_embedded_css || prefs.load_stylesheets)) {
3917 /* length of "<t class=i>" or "<t style=i>" */
3918 attrbuf = a_Html_get_attr(html, tag, tagsize, "class");
3919 if (attrbuf)
3920 html->styleEngine->setClass (attrbuf);
3921
3922 attrbuf = a_Html_get_attr(html, tag, tagsize, "style");
3923 if (attrbuf)
3924 html->styleEngine->setStyle (attrbuf);
3925 }
3926
3927 /* handle "xml:lang" and "lang" attributes
3928 * We use only the first two chars of the value to deal with
3929 * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt)
3930 */
3931 memset(lang, 0, sizeof(lang));
3932 if (tagsize >= 14) {
3933 /* length of "<t xml:lang=i>" */
3934 attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang");
3935 if (attrbuf)
3936 strncpy(lang, attrbuf, 2);
3937 }
3938 if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
3939 /* length of "<t lang=i>" */
3940 attrbuf = a_Html_get_attr(html, tag, tagsize, "lang");
3941 if (attrbuf)
3942 strncpy(lang, attrbuf, 2);
3943 }
3944 if (lang[0])
3946}
3947
3952static void Html_check_html5_obsolete(DilloHtml *html, int ni)
3953{
3954 static int indexes[9] = {-1};
3955
3956 if (indexes[0] == -1) {
3957 indexes[0] = a_Html_tag_index("dir");
3958 indexes[1] = a_Html_tag_index("frame");
3959 indexes[2] = a_Html_tag_index("frameset");
3960 indexes[3] = a_Html_tag_index("isindex");
3961 indexes[4] = a_Html_tag_index("strike");
3962 indexes[5] = a_Html_tag_index("big");
3963 indexes[6] = a_Html_tag_index("center");
3964 indexes[7] = a_Html_tag_index("font");
3965 indexes[8] = a_Html_tag_index("tt");
3966 }
3967 for (int i = 0; i < 9; i++) {
3968 if (indexes[i] == ni) {
3969 BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name);
3970 break;
3971 }
3972 }
3973}
3974
3976{
3978 false /* Perhaps true for widgets oof? */);
3979}
3980
3982{
3983 Html_add_textblock(html, false, 0, true);
3984}
3985
3987{
3988 Style *style = html->style ();
3989 Style *wordStyle = html->wordStyle ();
3990 Widget **ref_list_item;
3991 ListItem *list_item;
3992 int *list_number;
3993 char buf[16];
3994
3995 /* Get our parent tag's variables (used as state storage) */
3996 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
3997 ref_list_item = &html->stack->getRef(html->stack->size()-2)->ref_list_item;
3998
3999 HT2TB(html)->addParbreak (0, wordStyle);
4000
4001 list_item = new ListItem ((ListItem*)*ref_list_item,prefs.limit_text_width);
4002 HT2TB(html)->addWidget (list_item, style);
4003 HT2TB(html)->addParbreak (0, wordStyle);
4004 *ref_list_item = list_item;
4005 S_TOP(html)->textblock = html->dw = list_item;
4006
4008 // none
4010 // ordered
4011 numtostr((*list_number)++, buf, 16, style->listStyleType);
4012 list_item->initWithText (buf, wordStyle);
4013 } else {
4014 // unordered
4015 list_item->initWithWidget (new Bullet(), wordStyle);
4016 }
4017}
4018
4024static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
4025{
4026 int ti, ni; /* stack tag index and new tag index */
4027 char *start = tag + 1; /* discard the '<' */
4028 int IsCloseTag = (*start == '/');
4029
4030 dReturn_if (html->stop_parser == true);
4031
4032 ni = a_Html_tag_index(start + IsCloseTag);
4033 if (ni == -1) {
4034 /* TODO: doctype parsing is a bit fuzzy, but enough for the time being */
4035 if (!(html->InFlags & IN_HTML)) {
4036 if (tagsize > 9 && !dStrnAsciiCasecmp(tag, "<!doctype", 9))
4037 Html_parse_doctype(html, tag, tagsize);
4038 }
4039 /* Ignore unknown tags */
4040 return;
4041 }
4042 _MSG("Html_process_tag: %s%s\n", IsCloseTag ? "/" : "", Tags[ni].name);
4043
4044 if (!IsCloseTag && html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
4045 Html_check_html5_obsolete(html, ni);
4046
4047 int i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0;
4048 if (i == 1 || (i == 2 && ni != i_HTML))
4049 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
4050 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
4051
4052 /* Handle HTML, HEAD and BODY. Elements with optional open and close */
4053 if (!(html->InFlags & IN_BODY) /* && parsing HTML */)
4054 Html_test_section(html, ni, IsCloseTag);
4055
4056 /* Tag processing */
4057 ti = S_TOP(html)->tag_idx;
4058 switch (IsCloseTag) {
4059 case 0:
4060 /* Open function */
4061
4062 /* Cleanup before opening a new tag */
4063 if (ti != -1)
4065
4066 /* TODO: this is only raising a warning, but allows the element.
4067 * Note: Apache uses IMG inside PRE. */
4068 if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(html, ni))
4069 BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name);
4070
4071 /* Push the tag into the stack */
4072 Html_push_tag(html, ni);
4073
4074 html->startElement (ni);
4075 _MSG("Open : %*s%s\n", html->stack->size(), " ", Tags[ni].name);
4076
4077 /* Parse attributes that can appear on any tag */
4078 Html_parse_common_attrs(html, tag, tagsize);
4079
4080 /* Call the open function for this tag */
4081 _MSG("Html_process_tag Open : %s\n", Tags[ni].name);
4082 Tags[ni].open (html, tag, tagsize);
4083
4084 if (! S_TOP(html)->display_none) {
4085 switch (html->style ()->display) {
4086 case DISPLAY_BLOCK:
4087 Html_display_block(html);
4088 break;
4091 break;
4092 case DISPLAY_LIST_ITEM:
4094 break;
4095 case DISPLAY_NONE:
4096 S_TOP(html)->display_none = true;
4097 break;
4098 case DISPLAY_INLINE:
4099 if (html->style()->vloat != FLOAT_NONE)
4100 Html_display_block(html);
4101 break;
4102 default:
4103 break;
4104 }
4105
4106 if (Tags[ni].content && ! S_TOP(html)->display_none) {
4107 Tags[ni].content (html, tag, tagsize);
4108 }
4109 }
4110
4111 if (html->stop_parser)
4112 break;
4113
4114 if (S_TOP(html)->parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
4115 /* don't change anything */
4116 } else if (S_TOP(html)->parse_mode != DILLO_HTML_PARSE_MODE_PRE &&
4117 (html->style ()->whiteSpace == WHITE_SPACE_PRE ||
4118 html->style ()->whiteSpace == WHITE_SPACE_PRE_WRAP)) {
4119 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_PRE;
4120 html->pre_column = 0;
4121 html->PreFirstChar = true;
4122 }
4123
4124 if (html->styleEngine->getId ())
4125 Html_add_anchor(html, html->styleEngine->getId ());
4126
4127 /* Request immediate close for elements with forbidden close tag. */
4128 /* TODO: XHTML always requires close tags. A simple implementation
4129 * of the commented clause below will make it work. */
4130 if (/* parsing HTML && */ Tags[ni].EndTag == 'F')
4131 html->ReqTagClose = true;
4132
4133 /* Don't break! Open tags may also close themselves */
4134 /* fallthrough */
4135
4136 default:
4137 /* Close function */
4138
4139 /* Test for </x>, ReqTagClose, <x /> and <x/> */
4140 if (*start == '/' || /* </x> */
4141 html->ReqTagClose || /* request */
4142 (tag[tagsize-2] == '/' && /* XML: */
4143 (strchr(" \"'", tag[tagsize-3]) || /* [ "']/> */
4144 (size_t)tagsize == strlen(Tags[ni].name) + 3))) { /* <x/> */
4145
4146 _MSG("Html_process_tag Close: %s\n", Tags[ni].name);
4147 Html_tag_cleanup_at_close(html, ni);
4148 /* This was a close tag */
4149 html->ReqTagClose = false;
4150 }
4151 }
4152}
4153
4164static const char *Html_get_attr2(DilloHtml *html,
4165 const char *tag,
4166 int tagsize,
4167 const char *attrname,
4168 int tag_parsing_flags)
4169{
4170 int i, entsize, Found = 0, delimiter = 0, attr_pos = 0;
4171 Dstr *Buf = html->attr_data;
4173
4174 dReturn_val_if_fail(*attrname, NULL);
4175
4176 dStr_truncate(Buf, 0);
4177
4178 for (i = 1; i < tagsize; ++i) {
4179 switch (state) {
4180 case SEEK_ATTR_START:
4181 if (isspace(tag[i]))
4182 state = SEEK_TOKEN_START;
4183 else if (tag[i] == '=')
4184 state = SEEK_VALUE_START;
4185 break;
4186
4187 case MATCH_ATTR_NAME:
4188 if (!attrname[attr_pos] &&
4189 (tag[i] == '=' || isspace(tag[i]) || tag[i] == '>')) {
4190 Found = 1;
4191 state = SEEK_TOKEN_START;
4192 --i;
4193 } else if (!tag[i]) {
4194 state = SEEK_ATTR_START; // NULL byte is not allowed
4195 } else {
4196 if (D_ASCII_TOLOWER(tag[i]) != D_ASCII_TOLOWER(attrname[attr_pos]))
4197 state = SEEK_ATTR_START;
4198 attr_pos++;
4199 }
4200 break;
4201
4202 case SEEK_TOKEN_START:
4203 if (tag[i] == '=') {
4204 state = SEEK_VALUE_START;
4205 } else if (!isspace(tag[i])) {
4206 attr_pos = 0;
4207 state = (Found) ? FINISHED : MATCH_ATTR_NAME;
4208 --i;
4209 }
4210 break;
4211 case SEEK_VALUE_START:
4212 if (!isspace(tag[i])) {
4213 delimiter = (tag[i] == '"' || tag[i] == '\'') ? tag[i] : ' ';
4214 i -= (delimiter == ' ');
4215 state = (Found) ? GET_VALUE : SKIP_VALUE;
4216 }
4217 break;
4218
4219 case SKIP_VALUE:
4220 if ((delimiter == ' ' && isspace(tag[i])) || tag[i] == delimiter)
4221 state = SEEK_TOKEN_START;
4222 break;
4223 case GET_VALUE:
4224 if ((delimiter == ' ' && (isspace(tag[i]) || tag[i] == '>')) ||
4225 tag[i] == delimiter) {
4226 state = FINISHED;
4227 } else if (tag[i] == '&' &&
4228 (tag_parsing_flags & HTML_ParseEntities)) {
4229 const char *entstr;
4230 const bool_t is_attr = TRUE;
4231
4232 if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize,
4233 is_attr))) {
4234 dStr_append(Buf, entstr);
4235 i += entsize-1;
4236 } else {
4237 dStr_append_c(Buf, tag[i]);
4238 }
4239 } else if (tag[i] == '\r' || tag[i] == '\t') {
4240 dStr_append_c(Buf, ' ');
4241 } else if (tag[i] == '\n') {
4242 /* ignore */
4243 } else {
4244 dStr_append_c(Buf, tag[i]);
4245 }
4246 break;
4247
4248 case FINISHED:
4249 i = tagsize;
4250 break;
4251 }
4252 }
4253
4254 if (tag_parsing_flags & HTML_LeftTrim)
4255 while (isspace(Buf->str[0]))
4256 dStr_erase(Buf, 0, 1);
4257 if (tag_parsing_flags & HTML_RightTrim)
4258 while (Buf->len && isspace(Buf->str[Buf->len - 1]))
4259 dStr_truncate(Buf, Buf->len - 1);
4260
4261 return (Found) ? Buf->str : NULL;
4262}
4263
4267const char *a_Html_get_attr(DilloHtml *html,
4268 const char *tag,
4269 int tagsize,
4270 const char *attrname)
4271{
4272 return Html_get_attr2(html, tag, tagsize, attrname,
4274}
4275
4282 const char *tag,
4283 int tagsize,
4284 const char *attrname,
4285 const char *def)
4286{
4287 const char *attrbuf = a_Html_get_attr(html, tag, tagsize, attrname);
4288
4289 return attrbuf ? dStrdup(attrbuf) : dStrdup(def);
4290}
4291
4300static void Html_callback(int Op, CacheClient_t *Client)
4301{
4302 DilloHtml *html = (DilloHtml*)Client->CbData;
4303
4304 if (Op) { /* EOF */
4305 html->write((char*)Client->Buf, Client->BufSize, 1);
4306 html->finishParsing(Client->Key);
4307 } else {
4308 html->write((char*)Client->Buf, Client->BufSize, 0);
4309 }
4310}
4311
4316static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
4317{
4318 char ch = 0, *p, *text;
4319 int token_start, buf_index;
4320
4321 /* Now, 'buf' and 'bufsize' define a buffer aligned to start at a token
4322 * boundary. Iterate through tokens until end of buffer is reached. */
4323 buf_index = 0;
4324 token_start = buf_index;
4325 while ((buf_index < bufsize) && !html->stop_parser) {
4326 /* invariant: buf_index == bufsize || token_start == buf_index */
4327
4328 if (S_TOP(html)->parse_mode ==
4330 /* Non HTML code here, let's skip until closing tag */
4331 do {
4332 const char *tag = Tags[S_TOP(html)->tag_idx].name;
4333 buf_index += strcspn(buf + buf_index, "<");
4334 if (buf_index + (int)strlen(tag) + 3 > bufsize) {
4335 buf_index = bufsize;
4336 } else if (strncmp(buf + buf_index, "</", 2) == 0 &&
4337 Html_match_tag(tag, buf+buf_index+2, strlen(tag)+1)) {
4338 /* copy VERBATIM text into the stash buffer */
4339 text = dStrndup(buf + token_start, buf_index - token_start);
4340 dStr_append(html->Stash, text);
4341 dFree(text);
4342 token_start = buf_index;
4343 break;
4344 } else
4345 ++buf_index;
4346 } while (buf_index < bufsize);
4347
4348 if (buf_index == bufsize)
4349 break;
4350 }
4351
4352 if (isspace(buf[buf_index])) {
4353 /* whitespace: group all available whitespace */
4354 while (++buf_index < bufsize && isspace(buf[buf_index])) ;
4355 Html_process_space(html, buf + token_start, buf_index - token_start);
4356 token_start = buf_index;
4357
4358 } else if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4359 (isalpha(ch) || strchr("/!?", ch)) ) {
4360 /* Tag */
4361 if (buf_index + 3 < bufsize && !strncmp(buf + buf_index, "<!--", 4)) {
4362 /* Comment: search for close of comment, skipping over
4363 * everything except a matching "-->" tag. */
4364 while ( (p = (char*) memchr(buf + buf_index, '>',
4365 bufsize - buf_index)) ){
4366 buf_index = p - buf + 1;
4367 if (p[-1] == '-' && p[-2] == '-') break;
4368 }
4369 if (p) {
4370 /* Got the whole comment. Let's throw it away! :) */
4371 token_start = buf_index;
4372 } else
4373 buf_index = bufsize;
4374 } else {
4375 /* Tag: search end of tag (skipping over quoted strings) */
4376 html->CurrOfs = html->Start_Ofs + token_start;
4377
4378 while ( buf_index < bufsize ) {
4379 buf_index++;
4380 buf_index += strcspn(buf + buf_index, ">\"'<");
4381 if ((ch = buf[buf_index]) == '>') {
4382 break;
4383 } else if (ch == '"' || ch == '\'') {
4384 /* Skip over quoted string */
4385 buf_index++;
4386 buf_index += strcspn(buf + buf_index,
4387 (ch == '"') ? "\">" : "'>");
4388 if (buf[buf_index] == '>') {
4389 /* Unterminated string value? Let's look ahead and test:
4390 * (<: unterminated, closing-quote: terminated) */
4391 int offset = buf_index + 1;
4392 offset += strcspn(buf + offset,
4393 (ch == '"') ? "\"<" : "'<");
4394 if (buf[offset] == ch || !buf[offset]) {
4395 buf_index = offset;
4396 } else {
4397 BUG_MSG("Attribute lacks closing quote.");
4398 break;
4399 }
4400 }
4401 } else if (ch == '<') {
4402 /* unterminated tag detected */
4403 p = dStrndup(buf+token_start+1,
4404 strcspn(buf+token_start+1, " <\n\r\t"));
4405 BUG_MSG("<%s> lacks its closing '>'.", p);
4406 dFree(p);
4407 --buf_index;
4408 break;
4409 }
4410 }
4411 if (buf_index < bufsize) {
4412 buf_index++;
4413 Html_process_tag(html, buf + token_start,
4414 buf_index - token_start);
4415 token_start = buf_index;
4416 }
4417 }
4418 } else {
4419 /* A Word: search for whitespace or tag open */
4420 html->CurrOfs = html->Start_Ofs + token_start;
4421
4422 while (++buf_index < bufsize) {
4423 buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v");
4424 if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4425 !isalpha(ch) && !strchr("/!?", ch))
4426 continue;
4427 break;
4428 }
4429 if (buf_index < bufsize || Eof) {
4430 /* successfully found end of token */
4431 ch = buf[buf_index];
4432 buf[buf_index] = 0;
4433 Html_process_word(html, buf + token_start,
4434 buf_index - token_start);
4435 buf[buf_index] = ch;
4436 token_start = buf_index;
4437 }
4438 }
4439 }/*while*/
4440
4441 HT2TB(html)->flush ();
4442
4443 return token_start;
4444}
4445
4446
#define B8(d)
Definition binaryconst.h:32
#define _MSG(...)
Definition bookmarks.c:45
#define MSG(...)
Definition bookmarks.c:46
bool_t a_Bw_expecting(BrowserWindow *bw)
Definition bw.c:334
void a_Bw_close_client(BrowserWindow *bw, int ClientKey)
Close a cache-client upon successful retrieval.
Definition bw.c:167
void a_Bw_add_url(BrowserWindow *bw, const DilloUrl *Url)
Add an URL to the browser window's list.
Definition bw.c:209
void a_Bw_add_client(BrowserWindow *bw, int Key, int Root)
Add a reference to a cache-client.
Definition bw.c:128
void a_Bw_remove_doc(BrowserWindow *bw, void *vdoc)
Remove a document from the bw's list.
Definition bw.c:263
void a_Bw_add_doc(BrowserWindow *bw, void *vdoc)
Add a document to the browser window's list.
Definition bw.c:221
void(* CA_Callback_t)(int Op, CacheClient_t *Client)
Callback type for cache clients.
Definition cache.h:43
int a_Capi_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize)
Get the cache's buffer for the URL, and its size.
Definition capi.c:543
const char * a_Capi_set_content_type(const DilloUrl *url, const char *ctype, const char *from)
Set the Content-Type for the URL.
Definition capi.c:567
int a_Capi_dpi_verify_request(BrowserWindow *bw, DilloUrl *url)
Safety test: only allow GET|POST dpi-urls from dpi-generated pages.
Definition capi.c:233
void a_Capi_unref_buf(const DilloUrl *Url)
Unref the cache's buffer when no longer using it.
Definition capi.c:551
int a_Capi_open_url(DilloWeb *web, CA_Callback_t Call, void *CbData)
Most used function for requesting a URL.
Definition capi.c:394
int a_Capi_get_flags_with_redirection(const DilloUrl *Url)
Same as a_Capi_get_flags() but following redirections.
Definition capi.c:532
#define CAPI_IsCached
Definition capi.h:15
#define CAPI_Completed
Definition capi.h:19
A list of CssProperty objects.
Definition css.hh:331
bool enter(dw::core::Widget *widget, int link, int img, int x, int y)
Called, when a link is entered, left, or the position has changed.
Definition html.cc:740
bool press(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "press" signal.
Definition html.cc:760
bool click(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "click" signal.
Definition html.cc:794
bool ReqTagClose
Flag to close the stack's top tag.
void freeParseData()
Free parsing data.
Definition html.cc:627
int InFlags
tracks which elements we are in
DilloHtmlDocumentType DocType
DilloUrl * base_url
int getCurrLineNumber()
Return the line number of the tag/word being processed by the parser.
Definition html.cc:605
bool stop_parser
void bugMessage(const char *format,...)
Collect HTML error strings.
Definition html.cc:153
lout::misc::SimpleVector< DilloHtmlState > * stack
void write(char *Buf, int BufSize, int Eof)
Process the newly arrived html and put it into the page structure.
Definition html.cc:578
dw::core::style::Style * backgroundStyle()
char * Start_Buf
int formNew(DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc enc, const char *charset)
Allocate and insert form information.
Definition html.cc:667
void finishParsing(int ClientKey)
Finish parsing a HTML page.
Definition html.cc:641
bool PreFirstChar
used to skip the first CR or CRLF in PRE tags
void startElement(int tag)
size_t CurrOfs
void addCssUrl(const DilloUrl *url)
Save URL in a vector (may be loaded later).
Definition html.cc:733
void initDw()
Miscellaneous initializations for Dw.
Definition html.cc:522
bool PrevWasCR
Flag to help parsing of "\r\n" in PRE tags.
float DocTypeVersion
Dstr * Stash
bool TagSoup
Flag to enable the parser's cleanup functions.
bool_t unloadedImages()
Definition html.cc:689
int pre_column
current column, used in PRE tags with tabs
DilloUrl * page_url
lout::misc::SimpleVector< DilloHtmlInput * > * inputs_outside_form
bool PrevWasHtmlClose
set when </html> is found
void restyle()
uchar_t Num_BODY
dw::core::style::Style * wordStyle()
bool StashSpace
BrowserWindow * bw
int32_t non_css_visited_color
as provided by vlink attribute in BODY
size_t OldLine
bool loadCssFromStash
current stash content should be loaded as CSS
lout::misc::SimpleVector< DilloHtmlForm * > * forms
dw::ImageMapsList maps
DilloHtml(BrowserWindow *bw, const DilloUrl *url, const char *content_type)
Create and initialize a new DilloHtml class.
Definition html.cc:432
~DilloHtml()
Free memory used by the DilloHtml class.
Definition html.cc:536
uchar_t Num_TITLE
dw::core::style::Style * style()
size_t OldOfs
uchar_t Num_HTML
element counters: used for validation purposes.
lout::misc::SimpleVector< DilloUrl * > * links
dw::core::Widget * dw
int32_t non_css_link_color
as provided by link attribute in BODY
char * content_type
Dstr * attr_data
Buffer for attribute value.
bool PrevWasBodyClose
set when </body> is found
lout::misc::SimpleVector< DilloUrl * > * cssUrls
int32_t visited_color
as computed according to CSS
uchar_t Num_HEAD
char * charset
bool InVisitedLink
used to 'contrast_visited_colors'
StyleEngine * styleEngine
lout::misc::SimpleVector< DilloHtmlImage * > * images
HtmlLinkReceiver linkReceiver
DilloHtmlForm * getCurrentForm()
Get the current form.
Definition html.cc:684
void loadImages(const DilloUrl *pattern)
Load images if they were disabled.
Definition html.cc:702
This class provides the glue between HTML parser and CSS subsystem.
void parse(DilloHtml *html, DilloUrl *url, const char *buf, int buflen, CssOrigin origin)
void setPseudoVisited()
set the CSS pseudo class :visited.
void setNonCssHint(CssPropertyName name, CssValueType type, CssPropertyValue value)
void setId(const char *id)
const char * getId()
dw::core::style::Color * backgroundColor()
void endElement(int tag)
tell the styleEngine that a html element has ended.
void setPseudoLink()
set the CSS pseudo class :link.
void setStyle(const char *style)
void setClass(const char *klass)
void inheritBackgroundColor()
Use of the background color of the parent style as default.
dw::core::style::StyleImage * backgroundImage(dw::core::style::BackgroundRepeat *bgRepeat, dw::core::style::BackgroundAttachment *bgAttachment, dw::core::style::Length *bgPositionX, dw::core::style::Length *bgPositionY)
Displays different kind of bullets.
Definition bullet.hh:15
void addShapeToCurrentMap(core::Shape *shape, int link)
Add a shape to the current map-.
Definition image.cc:111
void startNewMap(lout::object::Object *key)
Start a new map and make it the current one.
Definition image.cc:99
void setCurrentMapDefaultLink(int link)
Set default link for current map-.
Definition image.cc:119
Displays an instance of dw::core::Imgbuf.
Definition image.hh:120
void setIsMap()
Sets image as server side image map.
Definition image.cc:546
void setUseMap(ImageMapsList *list, Object *key)
Sets image as client side image map.
Definition image.cc:559
void forceMapRedraw()
Definition image.hh:179
void initWithText(const char *text, core::style::Style *style)
Definition listitem.cc:58
void initWithWidget(core::Widget *widget, core::style::Style *style)
Definition listitem.cc:48
Widget for drawing (horizontal) rules.
Definition ruler.hh:21
A Widget for rendering text blocks, i.e.
Definition textblock.hh:206
void addWidget(core::Widget *widget, core::style::Style *style)
Add a widget (word type) to the page.
void addSpace(core::style::Style *style)
?
void addText(const char *text, size_t len, core::style::Style *style)
Add a word to the page structure.
void addParbreak(int space, core::style::Style *style)
Cause a paragraph break.
static int CLASS_ID
Definition textblock.hh:867
dw::core::Shape implemtation for simple circles.
Definition types.hh:91
Represents a button press or release event.
Definition events.hh:58
ButtonState state
Definition events.hh:42
dw::core::Shape implemtation for polygons.
Definition types.hh:105
void addPoint(int x, int y)
Definition types.cc:156
dw::core::Shape implemtation for simple rectangles.
Definition types.hh:70
Abstract interface for different shapes.
Definition types.hh:59
The base class of all dillo widgets.
Definition widget.hh:44
virtual void setStyle(style::Style *style)
Change the style of a widget.
Definition widget.cc:1316
style::Color * getFgColor()
Get the actual foreground color of a widget.
Definition widget.cc:1450
Widget * getParent()
Definition widget.hh:587
Layout * getLayout()
Definition widget.hh:602
style::Color * getBgColor()
Get the actual background of a widget.
Definition widget.cc:1431
ListStyleType listStyleType
Definition style.hh:568
static Style * create(StyleAttrs *attrs)
Definition style.hh:629
Simple (simpler than container::untyped::Vector and container::typed::Vector) template based vector.
Definition misc.hh:115
void setSize(int newSize)
Set the size explicitly.
Definition misc.hh:188
void increase()
Increase the vector size by one.
Definition misc.hh:181
void set(int i, T t)
Store an object in the vector.
Definition misc.hh:267
T get(int i) const
Return the one element, explicitly.
Definition misc.hh:222
int size() const
Return the number of elements put into this vector.
Definition misc.hh:162
T * getRef(int i) const
Return the reference of one element.
Definition misc.hh:211
int32_t a_Color_parse(const char *str, int32_t default_color, int *err)
Parse a color string.
Definition colors.c:258
int32_t a_Color_vc(int32_t candidate, int32_t C_txt, int32_t C_lnk, int32_t C_bg)
Return a suitable "visited link" color.
Definition colors.c:344
@ CSS_PROPERTY_LIST_STYLE_TYPE
Definition css.hh:210
@ CSS_PROPERTY_MARGIN_BOTTOM
Definition css.hh:211
@ CSS_PROPERTY_BORDER_RIGHT_STYLE
Definition css.hh:178
@ CSS_PROPERTY_MARGIN_RIGHT
Definition css.hh:213
@ PROPERTY_X_LANG
Definition css.hh:249
@ CSS_PROPERTY_BORDER_RIGHT_WIDTH
Definition css.hh:179
@ CSS_PROPERTY_BORDER_LEFT_WIDTH
Definition css.hh:176
@ PROPERTY_X_LINK
Definition css.hh:248
@ PROPERTY_X_TOOLTIP
Definition css.hh:251
@ CSS_PROPERTY_MARGIN_LEFT
Definition css.hh:212
@ CSS_PROPERTY_HEIGHT
Definition css.hh:204
@ CSS_PROPERTY_WIDTH
Definition css.hh:242
@ PROPERTY_X_IMG
Definition css.hh:250
@ CSS_PROPERTY_BORDER_TOP_WIDTH
Definition css.hh:183
@ CSS_PROPERTY_MARGIN_TOP
Definition css.hh:214
@ CSS_PROPERTY_VERTICAL_ALIGN
Definition css.hh:239
@ CSS_PROPERTY_BORDER_LEFT_STYLE
Definition css.hh:175
@ CSS_PROPERTY_TEXT_ALIGN
Definition css.hh:232
@ CSS_PROPERTY_BORDER_TOP_STYLE
Definition css.hh:182
@ CSS_PROPERTY_BACKGROUND_COLOR
Definition css.hh:166
@ CSS_PROPERTY_COLOR
Definition css.hh:188
@ CSS_PROPERTY_BORDER_BOTTOM_STYLE
Definition css.hh:171
@ CSS_PROPERTY_BORDER_BOTTOM_WIDTH
Definition css.hh:172
@ CSS_PROPERTY_FONT_FAMILY
Definition css.hh:197
@ CSS_TYPE_ENUM
Value is i, if represented by enum_symbols[i].
Definition css.hh:39
@ CSS_TYPE_INTEGER
This type is only used internally, for x-* properties.
Definition css.hh:37
@ CSS_TYPE_LENGTH_PERCENTAGE
<length> or <percentage>.
Definition css.hh:43
@ CSS_TYPE_STRING
<string>
Definition css.hh:58
@ CSS_TYPE_SYMBOL
Symbols, which are directly copied (as opposed to CSS_TYPE_ENUM and CSS_TYPE_MULTI_ENUM).
Definition css.hh:59
@ CSS_TYPE_COLOR
Represented as integer.
Definition css.hh:55
CssLengthType CSS_LENGTH_TYPE(CssLength l)
Definition css.hh:134
CssLength CSS_CREATE_LENGTH(float v, CssLengthType t)
Definition css.hh:102
@ CSS_LENGTH_TYPE_PX
Definition css.hh:76
@ CSS_LENGTH_TYPE_RELATIVE
This does not exist in CSS but is used in HTML.
Definition css.hh:88
@ CSS_LENGTH_TYPE_PERCENTAGE
Definition css.hh:87
@ CSS_LENGTH_TYPE_AUTO
This can be used as a simple value.
Definition css.hh:90
float CSS_LENGTH_VALUE(CssLength l)
Definition css.hh:138
@ CSS_ORIGIN_AUTHOR
Definition css.hh:33
unsigned int uint_t
Definition d_size.h:20
unsigned char bool_t
Definition d_size.h:21
char * a_Url_decode_hex_str(const char *str, size_t *p_sz)
Definition datauri.c:137
static const int bufsize
Definition decode.c:21
char * dStrconcat(const char *s1,...)
Concatenate a NULL-terminated list of strings.
Definition dlib.c:102
void dFree(void *mem)
Definition dlib.c:68
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:203
void dStr_sprintfa(Dstr *ds, const char *format,...)
Printf-like function that appends.
Definition dlib.c:464
void dStr_append(Dstr *ds, const char *s)
Append a C string to a Dstr.
Definition dlib.c:316
char * dStrdup(const char *s)
Definition dlib.c:77
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:254
int dStrnAsciiCasecmp(const char *s1, const char *s2, size_t n)
Definition dlib.c:215
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
Definition dlib.c:388
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:337
char * dStriAsciiStr(const char *haystack, const char *needle)
Case insensitive strstr.
Definition dlib.c:184
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:308
void dStr_append_c(Dstr *ds, int c)
Append one character.
Definition dlib.c:349
char * dStrndup(const char *s, size_t sz)
Definition dlib.c:88
void dStr_sprintf(Dstr *ds, const char *format,...)
Printf-like function.
Definition dlib.c:450
void dStr_vsprintfa(Dstr *ds, const char *format, va_list argp)
vsprintf-like function that appends.
Definition dlib.c:401
Dstr * dStr_new(const char *s)
Create a new string.
Definition dlib.c:325
void dStr_truncate(Dstr *ds, int len)
Truncate a Dstr to be 'len' bytes long.
Definition dlib.c:368
char * dStrnfill(size_t len, char c)
Return a new string of length 'len' filled with 'c' characters.
Definition dlib.c:149
#define dReturn_if_fail(expr)
Definition dlib.h:72
#define dReturn_val_if_fail(expr, val)
Definition dlib.h:76
#define dReturn_if(expr)
Definition dlib.h:64
#define dReturn_val_if(expr, val)
Definition dlib.h:68
#define D_ASCII_TOLOWER(c)
Definition dlib.h:37
#define TRUE
Definition dlib.h:23
#define FALSE
Definition dlib.h:19
#define dNew(type, count)
Definition dlib.h:49
@ SKIP_VALUE
Definition dpip.c:70
static Image * image
#define IMAGE_MAX_AREA
Definition fltkimgbuf.cc:27
void a_History_set_title_by_url(const DilloUrl *url, const char *title)
Set the page-title for a given URL.
Definition history.c:130
static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2511
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
Handle open TITLE.
Definition html.cc:1688
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4267
static misc::SimpleVector< int > * Html_read_coords(DilloHtml *html, const char *str)
Read coords in a string, returning a vector of ints.
Definition html.cc:2366
DilloHtmlTagParsingFlags
Definition html.cc:86
@ HTML_RightTrim
Definition html.cc:88
@ HTML_LeftTrim
Definition html.cc:87
@ HTML_ParseEntities
Definition html.cc:89
static int i_HR
Definition html.cc:131
static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2694
static void Html_css_load_callback(int Op, CacheClient_t *Client)
Called by the network engine when a stylesheet has new data.
Definition html.cc:3233
static void Html_real_pop_tag(DilloHtml *html)
Pop the top tag in the stack.
Definition html.cc:1375
static void Html_tag_open_blockquote(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2773
static int i_OPTGROUP
Definition html.cc:133
static void Html_push_tag(DilloHtml *html, int tag_idx)
Push the tag (copying attributes from the top of the stack)
Definition html.cc:1349
static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2841
static void Html_tag_close_a(DilloHtml *html)
Definition html.cc:2764
bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the VALIGN attribute (top|bottom|middle|baseline) and sets the style in style_attrs.
Definition html.cc:363
static const TagInfo Tags[]
Function index for the open, content, and close functions for each tag.
Definition html.cc:3469
static int Html_triggers_optional_close(int old_idx, int cur_idx)
For elements with optional close, check whether is time to close, by also following Firefox's de fact...
Definition html.cc:3637
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
Handle open HTML element.
Definition html.cc:1606
static bool Html_must_add_breaks(DilloHtml *html)
Definition html.cc:424
static int i_TBODY
Definition html.cc:140
static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2315
static void Html_add_anchor(DilloHtml *html, const char *name)
Register an anchor for this page.
Definition html.cc:2675
static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3386
static void Html_free(void *data)
Definition html.cc:222
static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3051
static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1783
static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
HTML, HEAD and BODY elements have optional open and close tags.
Definition html.cc:3844
static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
Create a new link, set it as the url's parent and return the index.
Definition html.cc:306
void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
Used by the "Show/Hide hiddens" form menuitem.
Definition html.cc:279
static bool Html_contains_form(DilloHtml *html, void *v_form)
Search for form.
Definition html.cc:240
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
Handle DOCTYPE declaration.
Definition html.cc:1520
static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2782
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:172
static void Html_tag_content_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1940
static int i_TH
Definition html.cc:141
static void Html_display_listitem(DilloHtml *html)
Definition html.cc:3986
static int Html_charref_comp(const void *a, const void *b)
Comparison function for binary search.
Definition html.cc:958
static void Html_tag_open_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1915
static const char * Html_parse_entity(DilloHtml *html, const char *token, int toksize, int *entsize, bool_t is_attr)
Given an entity, return the corresponding string.
Definition html.cc:1035
static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2808
DilloHtmlTagParsingState
Definition html.cc:76
@ SKIP_VALUE
Definition html.cc:81
@ SEEK_TOKEN_START
Definition html.cc:79
@ MATCH_ATTR_NAME
Definition html.cc:78
@ SEEK_ATTR_START
Definition html.cc:77
@ SEEK_VALUE_START
Definition html.cc:80
@ FINISHED
Definition html.cc:83
@ GET_VALUE
Definition html.cc:82
static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace, bool addBreakOpt)
Create and add a new Textblock to the current Textblock.
Definition html.cc:393
#define NTAGS
Definition html.cc:3582
static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1996
static void Html_process_word(DilloHtml *html, const char *word, int size)
Handles putting the word into its proper place.
Definition html.cc:1216
static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2474
static int i_BUTTON
Definition html.cc:127
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:318
static int i_DT
Definition html.cc:129
static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2857
static int i_SELECT
Definition html.cc:136
static void Html_display_block(DilloHtml *html)
Definition html.cc:3975
static void Html_tag_close_li(DilloHtml *html)
Definition html.cc:2943
static void Html_tag_close_pre(DilloHtml *html)
Definition html.cc:3062
static void Html_tag_content_frameset(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1985
static Charref_t * Html_charref_search(char *key)
Binary search of 'key' in charref list.
Definition html.cc:966
static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
Parse the LINK element (Only CSS stylesheets by now).
Definition html.cc:3299
static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2008
static void Html_tag_close_media(DilloHtml *html)
Media (AUDIO/VIDEO) close function.
Definition html.cc:2614
static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
Parse attributes that can appear on any tag.
Definition html.cc:3898
static const char * Html_parse_numeric_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a numeric character reference (e.g., "&#47;" or "&#x2F;").
Definition html.cc:876
static void Html_tag_content_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2500
static void Html_tag_open_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2576
static int i_A
Definition html.cc:125
static void Html_tag_close_q(DilloHtml *html)
Definition html.cc:2797
static void Html_tag_close_map(DilloHtml *html)
Handle close <MAP>.
Definition html.cc:2340
static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
This function is called after popping the stack, to handle nested Textblock widgets.
Definition html.cc:1336
static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2952
static CssLength Html_parse_length_or_multi_length(const char *attr, char **endptr)
Definition html.cc:1394
static void Html_process_space_pre_line(DilloHtml *html, const char *space, int spacesize)
For white-space: pre-line, we must break the line if encountering a newline.
Definition html.cc:1108
static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx)
Conditional cleanup of the stack, called before closing any tag.
Definition html.cc:3797
static void Html_process_space(DilloHtml *html, const char *space, int spacesize)
Parse spaces.
Definition html.cc:1130
void a_Html_form_submit(void *v_html, void *v_form)
Used by the "Submit form" form menuitem.
Definition html.cc:253
static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3043
static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3010
static const char * Html_get_attr2(DilloHtml *html, const char *tag, int tagsize, const char *attrname, int tag_parsing_flags)
Get attribute value for 'attrname' and return it.
Definition html.cc:4164
static int i_TD
Definition html.cc:138
static int i_TR
Definition html.cc:139
static void Html_callback(int Op, CacheClient_t *Client)
Dispatch the apropriate function for 'Op'.
Definition html.cc:4300
static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
Process a tag, given as 'tag' and 'tagsize'.
Definition html.cc:4024
static void Html_tag_close_par(DilloHtml *html)
Default close for paragraph tags - pop the stack and break.
Definition html.cc:3428
void(* TagCloseFunct)(DilloHtml *html)
Definition html.cc:74
static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
Set the URL data for image maps.
Definition html.cc:292
static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
Create a new Image struct and request the image-url to the cache.
Definition html.cc:2269
static int i_DD
Definition html.cc:128
static void Html_update_content_type(DilloHtml *html, const char *content)
Update the document's content type information based on meta tag data.
Definition html.cc:3094
void(* TagOpenFunct)(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:73
static void Html_tag_open_default(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3378
static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2623
static int Html_ms_stupid_quotes_2ucs(int codepoint)
This is M$ non-standard "smart quotes" (w1252).
Definition html.cc:856
#define TAB_SIZE
Definition html.cc:58
static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2206
void a_Html_load_images(void *v_html, DilloUrl *pattern)
Used by the "Load images" page menuitem.
Definition html.cc:230
static const char * Html_get_javascript_link(DilloHtml *html)
Test and extract the link from a javascript instruction.
Definition html.cc:2652
static void Html_tag_content_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2604
static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
Here's where we parse the html and put it into the Textblock structure.
Definition html.cc:4316
static int i_OPTION
Definition html.cc:134
static int Html_forbids_cross_nesting(const int InFlags, const int new_idx)
Check nesting and cross-nesting between BUTTON, SELECT, TEXTAREA and A.
Definition html.cc:3676
static void Html_tag_close_html(DilloHtml *html)
Handle close HTML element.
Definition html.cc:1625
static void Html_tag_close_title(DilloHtml *html)
Handle close TITLE.
Definition html.cc:1708
static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
Handle META We do not support http-equiv=refresh with delay>0 because it's non standard,...
Definition html.cc:3120
static int i_LI
Definition html.cc:132
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
Handle open SCRIPT.
Definition html.cc:1726
static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2046
static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3436
static void Html_tag_open_sectioning(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3403
static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3034
static int i_BODY
Definition html.cc:126
void * a_Html_text(const char *type, void *P, CA_Callback_t *Call, void **Data)
Set callback function and callback data for the "html/text" MIME type.
Definition html.cc:211
static bool Html_load_image(BrowserWindow *bw, DilloUrl *url, const DilloUrl *requester, DilloImage *image)
Tell cache to retrieve image.
Definition html.cc:2188
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
Push the tag (used to force en element with optional open into the stack).
Definition html.cc:1366
#define SGML_SPCDEL
Definition html.cc:56
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
Handle open STYLE.
Definition html.cc:1744
static const char * Html_parse_named_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a named character reference (e.g., "&" or "…").
Definition html.cc:979
static int Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
Check that 'val' is composed of characters inside [A-Za-z0-9:_.
Definition html.cc:1476
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3610
static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
Set the Document Base URI.
Definition html.cc:3349
static void Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2396
static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
Does the tag in tagstr (e.g.
Definition html.cc:1318
static void Html_tag_close_body(DilloHtml *html)
Definition html.cc:1889
static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2544
static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2016
static void Html_tag_close_head(DilloHtml *html)
Handle close HEAD element.
Definition html.cc:1662
static int i_TEXTAREA
Definition html.cc:137
static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx, int new_idx, int fi, char op)
Cleanup the stack to a given index.
Definition html.cc:3695
static void Html_tag_content_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2643
static void Html_tag_close_script(DilloHtml *html)
Handle close SCRIPT.
Definition html.cc:1735
static void Html_display_inline_block(DilloHtml *html)
Definition html.cc:3981
static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
Conditional cleanup of the stack (at open time).
Definition html.cc:3747
static int i_HTML
Definition html.cc:130
static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
Check whether a tag is in the "excluding" element set for PRE.
Definition html.cc:3071
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
Handle open HEAD element.
Definition html.cc:1637
static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2911
static int i_P
Definition html.cc:135
static int Html_tag_compare(const char *p1, const char *p2)
Definition html.cc:3595
static void Html_check_html5_obsolete(DilloHtml *html, int ni)
Warn when encountering elements that are obsolete in HTML5.
Definition html.cc:3952
static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1903
static void Html_tag_close_style(DilloHtml *html)
Handle close STYLE.
Definition html.cc:1773
static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2874
void a_Html_form_reset(void *v_html, void *v_form)
Used by the "Reset form" form menuitem.
Definition html.cc:266
static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3024
static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3419
#define HTML_NTAGS
Definition html.hh:24
#define NumRef
static const Charref_t Charrefs[NumRef]
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4267
void a_Html_stash_init(DilloHtml *html)
Initialize the stash buffer.
Definition html.cc:843
@ DILLO_HTML_TABLE_MODE_NONE
no table at all
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:172
#define HT2TB(html)
"html struct" to Textblock
DilloImage * a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2133
CssLength a_Html_parse_length(DilloHtml *html, const char *attr)
Returns a length or a percentage, or UNDEF_LENGTH in case of an error, or if attr is NULL.
Definition html.cc:1433
int32_t a_Html_color_parse(DilloHtml *html, const char *str, int32_t default_color)
Parse a color attribute.
Definition html.cc:1458
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:318
DilloHtmlParseMode
@ DILLO_HTML_PARSE_MODE_PRE
@ DILLO_HTML_PARSE_MODE_VERBATIM
@ DILLO_HTML_PARSE_MODE_BODY
@ DILLO_HTML_PARSE_MODE_INIT
@ DILLO_HTML_PARSE_MODE_STASH_AND_BODY
@ DILLO_HTML_PARSE_MODE_STASH
#define BUG_MSG(...)
Add a bug-meter message.
void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
Read image-associated tag attributes and create new image.
Definition html.cc:2063
#define S_TOP(html)
Top of the parsing stack.
@ IN_HEAD
@ IN_BUTTON
@ IN_LI
@ IN_MEDIA
@ IN_PRE
@ IN_BODY
@ IN_MAP
@ IN_TEXTAREA
@ IN_NONE
@ IN_META_HACK
@ IN_SELECT
@ IN_A
@ IN_HTML
@ IN_EOF
@ HTML_LIST_ORDERED
@ HTML_LIST_UNORDERED
@ HTML_LIST_NONE
#define HT2LT(html)
"html struct" to "Layout"
@ DILLO_HTML_TABLE_BORDER_SEPARATE
char * a_Html_get_attr_wdef(DilloHtml *html, const char *tag, int tagsize, const char *attrname, const char *def)
"a_Html_get_attr with default" Call a_Html_get_attr() and dStrdup() the returned string.
Definition html.cc:4281
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3610
@ DT_UNRECOGNIZED
@ DT_NONE
@ DT_XHTML
@ DT_HTML
char * a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
Parse all the entities in a token.
Definition html.cc:1070
void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url)
Tell cache to retrieve a stylesheet.
Definition html.cc:3247
int a_Misc_content_type_cmp(const char *ct1, const char *ct2)
Compare two Content-Type strings.
Definition misc.c:274
void a_Misc_parse_content_type(const char *type, char **major, char **minor, char **charset)
Parse Content-Type string, e.g., "text/html; charset=utf-8".
Definition misc.c:210
#define d_isascii(c)
Definition misc.h:11
Anything related to Dillo Widget styles is defined here.
Definition style.cc:34
@ LIST_STYLE_TYPE_DISC
Definition style.hh:303
@ LIST_STYLE_TYPE_UPPER_ALPHA
Definition style.hh:313
@ LIST_STYLE_TYPE_LOWER_ALPHA
Definition style.hh:311
@ LIST_STYLE_TYPE_CIRCLE
Definition style.hh:304
@ LIST_STYLE_TYPE_UPPER_ROMAN
Definition style.hh:309
@ LIST_STYLE_TYPE_DECIMAL
Definition style.hh:306
@ LIST_STYLE_TYPE_SQUARE
Definition style.hh:305
@ LIST_STYLE_TYPE_NONE
Definition style.hh:323
@ LIST_STYLE_TYPE_LOWER_ROMAN
Definition style.hh:308
@ DISPLAY_INLINE_BLOCK
Definition style.hh:281
int Length
Type for representing all lengths within dw::core::style.
Definition style.hh:429
void numtostr(int num, char *buf, int buflen, ListStyleType listStyleType)
Convert a number into a string, in a given list style.
Definition style.cc:1422
@ WHITE_SPACE_PRE_LINE
Definition style.hh:364
@ WHITE_SPACE_PRE_WRAP
Definition style.hh:363
Anything related to embedded UI widgets is defined here.
Definition core.hh:36
The core of Dw is defined in this namespace.
Definition core.hh:23
@ SHIFT_MASK
Definition events.hh:17
Dw is in this namespace, or sub namespaces of this one.
Definition form.cc:25
DilloPrefs prefs
Global Data.
Definition prefs.c:33
void Html_tag_close_form(DilloHtml *html)
Definition form.cc:402
void a_Html_form_display_hiddens2(void *vform, bool display)
Definition form.cc:264
void Html_tag_open_textarea(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:626
void Html_tag_content_textarea(DilloHtml *html, const char *tag, int tagsize)
The textarea tag.
Definition form.cc:636
void Html_tag_close_button(DilloHtml *html)
Handle close <BUTTON>
Definition form.cc:974
void Html_tag_open_option(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:868
DilloHtmlForm * a_Html_form_new(DilloHtml *html, DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc content_type, const char *charset, bool enabled)
Definition form.cc:235
void Html_tag_open_optgroup(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:808
void a_Html_form_reset2(void *vform)
Definition form.cc:259
void a_Html_form_delete(DilloHtmlForm *form)
Definition form.cc:244
void a_Html_input_delete(DilloHtmlInput *input)
Definition form.cc:249
void Html_tag_close_select(DilloHtml *html)
Definition form.cc:788
void Html_tag_open_isindex(DilloHtml *html, const char *tag, int tagsize)
The ISINDEX tag is just a deprecated form of <INPUT type=text>> with implied FORM,...
Definition form.cc:589
void Html_tag_close_optgroup(DilloHtml *html)
Definition form.cc:844
void Html_tag_close_textarea(DilloHtml *html)
Close textarea.
Definition form.cc:695
void Html_tag_open_select(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:735
void Html_tag_open_input(DilloHtml *html, const char *tag, int tagsize)
Add a new input to current form.
Definition form.cc:432
void Html_tag_open_button(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:907
void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize)
Handle <FORM> tag.
Definition form.cc:338
void a_Html_form_submit2(void *vform)
Definition form.cc:254
void Html_tag_close_option(DilloHtml *html)
Definition form.cc:896
DilloHtmlMethod
Definition form.hh:10
DilloHtmlEnc
Definition form.hh:16
void a_Image_ref(DilloImage *Image)
Add a reference to an Image struct Do nothing if the argument is NULL.
Definition image.cc:100
void a_Image_unref(DilloImage *Image)
Unref and free if necessary Do nothing if the argument is NULL.
Definition image.cc:89
DilloImage * a_Image_new(void *layout, void *img_rndr, int32_t bg_color, int32_t fg_color)
Create and initialize a new image structure.
Definition image.cc:34
void Html_tag_open_td(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:242
void Html_tag_content_tbody(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:260
void Html_tag_open_tbody(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:256
void Html_tag_open_thead(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:274
void Html_tag_content_th(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:287
void Html_tag_open_table(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:47
void Html_tag_open_tfoot(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:267
void Html_tag_content_td(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:248
void Html_tag_content_tr(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:223
void Html_tag_open_th(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:281
void Html_tag_content_table(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:161
void Html_tag_open_tr(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:180
Contains the specific data for a single window.
Definition bw.h:27
Dstr * page_bugs
Definition bw.h:71
float zoom
Definition bw.h:74
int num_page_bugs
HTML-bugs detected at parse time.
Definition bw.h:70
int NumPendingStyleSheets
Number of not yet arrived style sheets.
Definition bw.h:48
Data structure for cache clients.
Definition cache.h:48
int Key
Primary Key for this client.
Definition cache.h:49
void * CbData
Client function data.
Definition cache.h:55
uint_t BufSize
Valid size of cache-data.
Definition cache.h:53
void * Buf
Pointer to cache-data.
Definition cache.h:52
void * Web
Pointer to the Web structure of our client.
Definition cache.h:56
const char * html4_str
const char * ref
const char * html5_str
DilloImage * image
DilloUrl * url
DilloHtmlTableMode table_mode
bool hand_over_break
This is used for list items etc.
DilloHtmlTableBorderMode table_border_mode
dw::core::Widget * table
dw::core::Widget * textblock
DilloHtmlListMode list_type
dw::core::Widget * ref_list_item
This is used to align list items (especially in enumerated lists)
DilloHtmlParseMode parse_mode
int tag_idx
TagInfo index for the tag that's being processed.
bool_t parse_embedded_css
Definition prefs.h:103
bool_t load_images
Definition prefs.h:99
bool_t show_tooltip
Definition prefs.h:66
bool_t limit_text_width
Definition prefs.h:71
bool_t focus_new_tab
Definition prefs.h:74
bool_t show_extra_warnings
Definition prefs.h:122
bool_t load_stylesheets
Definition prefs.h:102
bool_t contrast_visited_color
Definition prefs.h:65
bool_t middle_click_opens_new_tab
Definition prefs.h:114
Definition url.h:88
Definition dlib.h:102
Dstr_char_t * str
Definition dlib.h:105
int len
Definition dlib.h:104
void * img_rndr
Definition image.hh:61
int flags
Additional info.
Definition web.hh:29
DilloUrl * url
Requested URL.
Definition web.hh:25
BrowserWindow * bw
The requesting browser window [reference].
Definition web.hh:28
DilloImage * Image
For image urls [reference].
Definition web.hh:31
void a_UIcmd_link_popup(void *vbw, const DilloUrl *url)
Definition uicmd.cc:1272
void a_UIcmd_set_bug_prog(BrowserWindow *bw, int n_bug)
Definition uicmd.cc:1531
void a_UIcmd_page_popup(void *vbw, bool_t has_bugs, void *v_cssUrls)
Definition uicmd.cc:1262
void a_UIcmd_set_msg(BrowserWindow *bw, const char *format,...)
Definition uicmd.cc:1563
void a_UIcmd_image_popup(void *vbw, const DilloUrl *url, bool_t loaded_img, DilloUrl *page_url, DilloUrl *link_url)
Definition uicmd.cc:1280
void a_UIcmd_set_page_title(BrowserWindow *bw, const char *label)
Definition uicmd.cc:1540
void a_UIcmd_open_url_nw(BrowserWindow *bw, const DilloUrl *url)
Definition uicmd.cc:836
void a_UIcmd_repush(void *vbw)
Definition uicmd.cc:926
void a_UIcmd_open_url(BrowserWindow *bw, const DilloUrl *url)
Definition uicmd.cc:801
void a_UIcmd_redirection0(void *vbw, const DilloUrl *url)
Definition uicmd.cc:934
void a_UIcmd_open_url_nt(void *vbw, const DilloUrl *url, int focus)
Definition uicmd.cc:852
void a_Url_set_flags(DilloUrl *u, int flags)
Set DilloUrl flags.
Definition url.c:527
int a_Url_cmp(const DilloUrl *A, const DilloUrl *B)
Compare two Url's to check if they're the same, or which one is bigger.
Definition url.c:506
void a_Url_free(DilloUrl *url)
Free a DilloUrl.
Definition url.c:208
void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
Set DilloUrl ismap coordinates.
Definition url.c:549
DilloUrl * a_Url_new(const char *url_str, const char *base_url)
Transform (and resolve) an URL string into the respective DilloURL.
Definition url.c:371
char * a_Url_str(const DilloUrl *u)
Return the url as a string.
Definition url.c:66
DilloUrl * a_Url_dup(const DilloUrl *ori)
Duplicate a Url structure.
Definition url.c:477
#define URL_ILLEGAL_CHARS_SPC(u)
Definition url.h:81
#define URL_SCHEME_(u)
Definition url.h:49
#define URL_ILLEGAL_CHARS(u)
Definition url.h:80
#define URL_SpamSafe
Definition url.h:40
#define URL_FLAGS(u)
Definition url.h:79
#define URL_STR(u)
Definition url.h:76
#define URL_STR_(u)
Definition url.h:55
#define URL_SCHEME(u)
Definition url.h:70
bool_t a_Utf8_ideographic(const char *s, const char *end, int *len)
Does s point to a UTF-8-encoded ideographic character?.
Definition utf8.cc:76
int a_Utf8_encode(unsigned int ucs, char *buf)
Write UTF-8 encoding of ucs into buf and return number of bytes written.
Definition utf8.cc:54
static const char utf8_zero_width_space[]
Unicode zero width space U+200B.
Definition utf8.hh:19
DilloWeb * a_Web_new(BrowserWindow *bw, const DilloUrl *url, const DilloUrl *requester)
Allocate and set safe values for a DilloWeb structure.
Definition web.cc:121
#define WEB_Stylesheet
Definition web.hh:18
#define WEB_Image
Definition web.hh:17