Dillo v3.1.1-14-g8f67d6e0
Loading...
Searching...
No Matches
html.cc
Go to the documentation of this file.
1/*
2 * File: html.cc
3 *
4 * Copyright (C) 2005-2007 Jorge Arellano Cid <jcid@dillo.org>
5 * Copyright (C) 2024 Rodrigo Arias Mallo <rodarima@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 */
12
17/*-----------------------------------------------------------------------------
18 * Includes
19 *---------------------------------------------------------------------------*/
20#include <ctype.h> /* for isspace */
21#include <string.h> /* for memcpy and memmove */
22#include <stdlib.h>
23#include <stdio.h> /* for sprintf */
24#include <errno.h>
25
26#include "bw.h" /* for BrowserWindow */
27#include "msg.h"
28#include "binaryconst.h"
29#include "colors.h"
30#include "html_charrefs.h"
31#include "utf8.hh"
32
33#include "misc.h"
34#include "uicmd.hh"
35#include "history.h"
36#include "menu.hh"
37#include "prefs.h"
38#include "capi.h"
39#include "html.hh"
40#include "html_common.hh"
41#include "form.hh"
42#include "table.hh"
43
44#include "dw/textblock.hh"
45#include "dw/bullet.hh"
46#include "dw/listitem.hh"
47#include "dw/image.hh"
48#include "dw/ruler.hh"
49
50/*-----------------------------------------------------------------------------
51 * Defines
52 *---------------------------------------------------------------------------*/
53
54/* Define to 1 to ignore white space immediately after an open tag,
55 * and immediately before a close tag. */
56#define SGML_SPCDEL 0
57
58#define TAB_SIZE 8
59
60/*-----------------------------------------------------------------------------
61 * Name spaces
62 *---------------------------------------------------------------------------*/
63using namespace lout;
64using namespace dw;
65using namespace dw::core;
66using namespace dw::core::ui;
67using namespace dw::core::style;
68
69/*-----------------------------------------------------------------------------
70 * Typedefs
71 *---------------------------------------------------------------------------*/
72class DilloHtml;
73typedef void (*TagOpenFunct) (DilloHtml *html, const char *tag, int tagsize);
74typedef void (*TagCloseFunct) (DilloHtml *html);
75
85
91
92
93/*
94 * Exported function with C linkage.
95 */
96extern "C" {
97void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data);
98}
99
100/*-----------------------------------------------------------------------------
101 * Forward declarations
102 *---------------------------------------------------------------------------*/
103static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof);
104static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
105 const DilloUrl *requester, DilloImage *image);
106static void Html_callback(int Op, CacheClient_t *Client);
107static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx);
108int a_Html_tag_index(const char *tag);
109
110/*-----------------------------------------------------------------------------
111 * Local Data
112 *---------------------------------------------------------------------------*/
114typedef struct {
115 const char *name; /* element name */
116 unsigned char Flags; /* flags (explained near the table data) */
117 char EndTag; /* Is it Required, Optional or Forbidden */
118 TagOpenFunct open; /* Open function */
119 TagOpenFunct content; /* Content function */
120 TagCloseFunct close; /* Close function */
121} TagInfo;
122
123/* Some element indexes required in scattered places */
124static int
141
142
143/*-----------------------------------------------------------------------------
144 *-----------------------------------------------------------------------------
145 * Main Code
146 *-----------------------------------------------------------------------------
147 *---------------------------------------------------------------------------*/
148
152void DilloHtml::bugMessage(const char *format, ... )
153{
154 va_list argp;
155
156 if (bw->num_page_bugs)
157 dStr_append_c(bw->page_bugs, '\n');
159 "HTML warning: line %d, ",
161 va_start(argp, format);
162 dStr_vsprintfa(bw->page_bugs, format, argp);
163 va_end(argp);
165}
166
172 const char *url_str, const char *base_url,
173 int use_base_url)
174{
175 if (!url_str) {
176 MSG("a_Html_url_new: URL is NULL\n");
177 return NULL;
178 }
179
180 DilloUrl *url = a_Url_new(url_str,
181 (use_base_url) ? base_url : URL_STR_(html->base_url));
182
183 if (!url) {
184 BUG_MSG("URL is not valid '%s'.", url_str);
185 return NULL;
186 }
187
188 int n_ic, n_ic_spc;
189 if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) {
190 const char *suffix = (n_ic) > 1 ? "s" : "";
191 n_ic_spc = URL_ILLEGAL_CHARS_SPC(url);
192 if (n_ic == n_ic_spc) {
193 BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str);
194 } else if (n_ic_spc == 0) {
195 BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').",
196 n_ic, suffix, url_str);
197 } else {
198 BUG_MSG("URL has %d illegal byte%s: "
199 "%d space%s and %d in {00-1F, 7F-FF} range ('%s').",
200 n_ic, suffix,
201 n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str);
202 }
203 }
204 return url;
205}
206
210void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data)
211{
212 DilloWeb *web = (DilloWeb*)P;
213 DilloHtml *html = new DilloHtml(web->bw, web->url, Type);
214
215 *Data = (void*)html;
217
218 return (void*)html->dw;
219}
220
221static void Html_free(void *data)
222{
223 delete ((DilloHtml*)data);
224}
225
229void a_Html_load_images(void *v_html, DilloUrl *pattern)
230{
231 DilloHtml *html = (DilloHtml*)v_html;
232
233 html->loadImages(pattern);
234}
235
239static bool Html_contains_form(DilloHtml *html, void *v_form)
240{
241 for (int i = 0; i < html->forms->size(); i++) {
242 if (html->forms->get(i) == v_form) {
243 return true;
244 }
245 }
246 return false;
247}
248
252void a_Html_form_submit(void *v_html, void *v_form)
253{
254 DilloHtml *html = (DilloHtml*)v_html;
255
256 if (Html_contains_form(html, v_form)) {
257 /* it's still valid */
258 a_Html_form_submit2(v_form);
259 }
260}
261
265void a_Html_form_reset(void *v_html, void *v_form)
266{
267 DilloHtml *html = (DilloHtml*)v_html;
268
269 if (Html_contains_form(html, v_form)) {
270 /* it's still valid */
271 a_Html_form_reset2(v_form);
272 }
273}
274
278void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
279{
280 DilloHtml *html = (DilloHtml*)v_html;
281
282 if (Html_contains_form(html, v_form)) {
283 /* it's still valid */
284 a_Html_form_display_hiddens2(v_form, (display != 0));
285 }
286}
287
291static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
292{
293 char data[64];
294
295 if (x != -1) {
296 snprintf(data, 64, "?%d,%d", x, y);
297 a_Url_set_ismap_coords(html->links->get(link), data);
298 }
299}
300
305static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
306{
307 int nl = html->links->size();
308 html->links->increase();
309 html->links->set(nl, (*url) ? *url : NULL);
310 return nl;
311}
312
317void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
318{
319 const char *align;
320
321 if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) {
322 TextAlignType textAlignType = TEXT_ALIGN_LEFT;
323
324 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
325 BUG_MSG("The align attribute is obsolete in HTML5.");
326
327 if (dStrAsciiCasecmp (align, "left") == 0)
328 textAlignType = TEXT_ALIGN_LEFT;
329 else if (dStrAsciiCasecmp (align, "right") == 0)
330 textAlignType = TEXT_ALIGN_RIGHT;
331 else if (dStrAsciiCasecmp (align, "center") == 0)
332 textAlignType = TEXT_ALIGN_CENTER;
333 else if (dStrAsciiCasecmp (align, "justify") == 0)
334 textAlignType = TEXT_ALIGN_JUSTIFY;
335#if 0
336 else if (dStrAsciiCasecmp (align, "char") == 0) {
337 /* TODO: Actually not supported for <p> etc. */
338 v.textAlign = TEXT_ALIGN_STRING;
339 if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) {
340 if (charattr[0] == 0)
341 /* TODO: ALIGN=" ", and even ALIGN="&32;" will reult in
342 * an empty string (don't know whether the latter is
343 * correct, has to be clarified with the specs), so
344 * that for empty strings, " " is assumed. */
345 style_attrs.textAlignChar = ' ';
346 else
347 style_attrs.textAlignChar = charattr[0];
348 } else
349 /* TODO: Examine LANG attr of <html>. */
350 style_attrs.textAlignChar = '.';
351 }
352#endif
354 textAlignType);
355 }
356}
357
362bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
363{
364 const char *attr;
365 VAlignType valign;
366
367 if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
368 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
369 BUG_MSG("The valign attribute is obsolete in HTML5.");
370
371 if (dStrAsciiCasecmp (attr, "top") == 0)
372 valign = VALIGN_TOP;
373 else if (dStrAsciiCasecmp (attr, "bottom") == 0)
374 valign = VALIGN_BOTTOM;
375 else if (dStrAsciiCasecmp (attr, "baseline") == 0)
376 valign = VALIGN_BASELINE;
377 else
378 valign = VALIGN_MIDDLE;
379
381 CSS_TYPE_ENUM, valign);
382 return true;
383 } else
384 return false;
385}
386
387
392static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace,
393 bool addBreakOpt)
394{
395 Textblock *textblock = new Textblock (prefs.limit_text_width);
396 Style *style;
397
398 if (addBreaks) {
399 StyleAttrs attrs = *(html->style ());
400 attrs.display = DISPLAY_BLOCK;
401 style = Style::create (&attrs);
402 } else {
403 style = html->style ();
404 style->ref ();
405 }
406
407 if (addBreaks)
408 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
409
410 HT2TB(html)->addWidget (textblock, style); /* Works also for floats etc. */
411 if (addBreakOpt)
412 HT2TB(html)->addBreakOption (html->style (), false);
413
414 if (addBreaks)
415 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
416 S_TOP(html)->textblock = html->dw = textblock;
417 if (addBreaks)
418 S_TOP(html)->hand_over_break = true;
419
420 style->unref ();
421}
422
424{
425 return HT2TB(html)->mustAddBreaks (html->style ());
426}
427
432 const char *content_type)
433{
434 /* Init main variables */
435 bw = p_bw;
436 page_url = a_Url_dup(url);
437 base_url = a_Url_dup(url);
438 dw = NULL;
439
440 /* Init event receiver */
441 linkReceiver.html = this;
442 HT2LT(this)->connectLink (&linkReceiver);
443
444 a_Bw_add_doc(p_bw, this);
445
446 /* Init for-parsing variables */
447 Start_Buf = NULL;
448 Start_Ofs = 0;
449
450 _MSG("DilloHtml(): content type: %s\n", content_type);
451 this->content_type = dStrdup(content_type);
452
453 /* get charset */
455
456 stop_parser = false;
457
458 CurrOfs = OldOfs = 0;
459 OldLine = 1;
460
461 DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
462 DocTypeVersion = 0.0f;
463
465
466 cssUrls = new misc::SimpleVector <DilloUrl*> (1);
467
468 stack = new misc::SimpleVector <DilloHtmlState> (16);
469 stack->increase();
473 stack->getRef(0)->cell_text_align_set = false;
474 stack->getRef(0)->display_none = false;
476 stack->getRef(0)->list_number = 0;
477 stack->getRef(0)->tag_idx = -1; /* MUST not be used */
478 stack->getRef(0)->textblock = NULL;
479 stack->getRef(0)->table = NULL;
480 stack->getRef(0)->ref_list_item = NULL;
481 stack->getRef(0)->hand_over_break = false;
482
484
485 Stash = dStr_new("");
486 StashSpace = false;
487
488 pre_column = 0;
489 PreFirstChar = false;
490 PrevWasCR = false;
491 InVisitedLink = false;
492 ReqTagClose = false;
493 TagSoup = true;
494 loadCssFromStash = false;
495 PrevWasBodyClose = false;
496 PrevWasHtmlClose = false;
497
499
501
504 visited_color = -1;
505
506 /* Init page-handling variables */
507 forms = new misc::SimpleVector <DilloHtmlForm*> (1);
508 inputs_outside_form = new misc::SimpleVector <DilloHtmlInput*> (1);
509 links = new misc::SimpleVector <DilloUrl*> (64);
510 images = new misc::SimpleVector <DilloHtmlImage*> (16);
511
512 /* Initialize the main widget */
513 initDw();
514 /* Hook destructor to the dw delete call */
515 dw->setDeleteCallback(Html_free, this);
516}
517
522{
523 dReturn_if_fail (dw == NULL);
524
525 /* Create the main widget */
527
528 bw->num_page_bugs = 0;
530}
531
536{
537 _MSG("::~DilloHtml(this=%p)\n", this);
538
540
541 a_Bw_remove_doc(bw, this);
542
545
546 for (int i = 0; i < cssUrls->size(); i++)
548 delete (cssUrls);
549
550 for (int i = 0; i < forms->size(); i++)
552 delete(forms);
553
554 for (int i = 0; i < inputs_outside_form->size(); i++)
556 delete(inputs_outside_form);
557
558 for (int i = 0; i < links->size(); i++)
559 a_Url_free(links->get(i));
560 delete (links);
561
562 for (int i = 0; i < images->size(); i++) {
563 DilloHtmlImage *img = images->get(i);
564 a_Url_free(img->url);
565 a_Image_unref(img->image);
566 dFree(img);
567 }
568 delete (images);
569
570 delete styleEngine;
571}
572
577void DilloHtml::write(char *Buf, int BufSize, int Eof)
578{
579 int token_start;
580 char *buf = Buf + Start_Ofs;
581 int bufsize = BufSize - Start_Ofs;
582
583 _MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs);
584#if 0
585 char *aux = dStrndup(Buf, BufSize);
586 MSG(" {%s}\n", aux);
587 dFree(aux);
588#endif
589
590 /* Update Start_Buf. It may be used after the parser is stopped */
591 Start_Buf = Buf;
592
593 dReturn_if (dw == NULL);
594 dReturn_if (stop_parser == true);
595
596 token_start = Html_write_raw(this, buf, bufsize, Eof);
597 Start_Ofs += token_start;
598}
599
605{
606 int i, ofs, line;
607 const char *p = Start_Buf;
608
609 dReturn_val_if_fail(p != NULL, -1);
610 /* Disable line counting for META hack. Buffers differ. */
612
613 ofs = CurrOfs;
614 line = OldLine;
615 for (i = OldOfs; i < ofs; ++i)
616 if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n'))
617 ++line;
618 OldOfs = CurrOfs;
619 OldLine = line;
620 return line;
621}
622
627{
628 delete(stack);
629
633 dFree(charset);
634}
635
640void DilloHtml::finishParsing(int ClientKey)
641{
642 int si;
643
644 dReturn_if (stop_parser == true);
645
646 /* flag we've already parsed up to the last byte */
647 InFlags |= IN_EOF;
648
649 /* force the close of elements left open (TODO: not for XHTML) */
650 while ((si = stack->size() - 1)) {
651 if (stack->getRef(si)->tag_idx != -1) {
653 }
654 }
655
656 /* Nothing left to do with the parser. Clear all flags, except EOF. */
657 InFlags = IN_EOF;
658
659 /* Remove this client from our active list */
660 a_Bw_close_client(bw, ClientKey);
661}
662
667 DilloHtmlEnc enc, const char *charset)
668{
669 // avoid data loss on repush after CSS stylesheets have been loaded
670 bool enabled = bw->NumPendingStyleSheets == 0;
671 DilloHtmlForm *form = a_Html_form_new (this, method, action,
672 enc, charset, enabled);
673 int nf = forms->size ();
674 forms->increase ();
675 forms->set (nf, form);
676 _MSG("Html formNew: action=%s nform=%d\n", action, nf);
677 return forms->size();
678}
679
684{
685 return forms->get (forms->size() - 1);
686}
687
689{
690 for (int i = 0; i < images->size(); i++) {
691 if (images->get(i)->image != NULL) {
692 return TRUE;
693 }
694 }
695 return FALSE;
696}
697
701void DilloHtml::loadImages (const DilloUrl *pattern)
702{
704
705 /* If the user asked for a specific image, the user (NULL) is the requester,
706 * and the domain mechanism will always permit the request. But if the user
707 * just asked for all images (clicking "Load images"), use the page URL as
708 * the requester so that the domain mechanism can act as a filter.
709 * If the possible patterns become more complex, it might be good to have
710 * the caller supply the requester instead.
711 */
712 const DilloUrl *requester = pattern ? NULL : this->page_url;
713
714 for (int i = 0; i < images->size(); i++) {
715 DilloHtmlImage *hi = images->get(i);
716
717 if (hi->image) {
718 assert(hi->url);
719 if ((!pattern) || (!a_Url_cmp(hi->url, pattern))) {
720 if (Html_load_image(bw, hi->url, requester, hi->image)) {
721 a_Image_unref (hi->image);
722 hi->image = NULL; // web owns it now
723 }
724 }
725 }
726 }
727}
728
733{
734 int nu = cssUrls->size();
735 cssUrls->increase();
736 cssUrls->set(nu, a_Url_dup(url));
737}
738
739bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img,
740 int x, int y)
741{
743
744 _MSG(" ** ");
745 if (link == -1) {
746 _MSG(" Link LEAVE notify...\n");
747 a_UIcmd_set_msg(bw, "");
748 } else {
749 _MSG(" Link ENTER notify...\n");
750 Html_set_link_coordinates(html, link, x, y);
751 a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link)));
752 }
753 return true;
754}
755
759bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img,
760 int x, int y, EventButton *event)
761{
762 BrowserWindow *bw = html->bw;
763 int ret = false;
764 DilloUrl *linkurl = NULL;
765
766 _MSG("pressed button %d\n", event->button);
767 if (event->button == 3) {
768 // popup menus
769 if (img != -1) {
770 // image menu
771 if (link != -1)
772 linkurl = html->links->get(link);
773 const bool_t loaded_img = (html->images->get(img)->image == NULL);
774 a_UIcmd_image_popup(bw, html->images->get(img)->url, loaded_img,
775 html->page_url, linkurl);
776 ret = true;
777 } else {
778 if (link == -1) {
779 a_UIcmd_page_popup(bw, bw->num_page_bugs != 0, html->cssUrls);
780 ret = true;
781 } else {
782 a_UIcmd_link_popup(bw, html->links->get(link));
783 ret = true;
784 }
785 }
786 }
787 return ret;
788}
789
793bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img,
794 int x, int y, EventButton *event)
795{
796 BrowserWindow *bw = html->bw;
797
798 if ((img != -1) && (html->images->get(img)->image)) {
799 // clicked an image that has not already been loaded
800 if (event->button == 1){
801 // load all instances of this image
802 DilloUrl *pattern = html->images->get(img)->url;
803 html->loadImages(pattern);
804 return true;
805 }
806 }
807
808 if (link != -1) {
809 DilloUrl *url = html->links->get(link);
810 _MSG("clicked on URL %d: %s\n", link, a_Url_str (url));
811
812 Html_set_link_coordinates(html, link, x, y);
813
814 if (event->button == 1) {
815 a_UIcmd_open_url(bw, url);
816 } else if (event->button == 2) {
818 int focus = prefs.focus_new_tab ? 1 : 0;
819 if (event->state == SHIFT_MASK) focus = !focus;
820 a_UIcmd_open_url_nt(bw, url, focus);
821 } else
823 } else {
824 return false;
825 }
826
827 /* Change the link color to "visited" as visual feedback */
828 for (Widget *w = widget; w; w = w->getParent()) {
829 _MSG(" ->%s\n", w->getClassName());
830 if (w->instanceOf(dw::Textblock::CLASS_ID)) {
831 ((Textblock*)w)->changeLinkColor (link, html->visited_color);
832 break;
833 }
834 }
835 }
836 return true;
837}
838
843{
844 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH;
845 html->StashSpace = false;
846 dStr_truncate(html->Stash, 0);
847}
848
855static int Html_ms_stupid_quotes_2ucs(int codepoint)
856{
857 int ret;
858 switch (codepoint) {
859 case 145:
860 case 146: ret = '\''; break;
861 case 147:
862 case 148: ret = '"'; break;
863 case 149: ret = 176; break;
864 case 150:
865 case 151: ret = '-'; break;
866 default: ret = codepoint; break;
867 }
868 return ret;
869}
870
875static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok,
876 bool_t is_attr, int *entsize)
877{
878 static char buf[5];
879 char *s = tok;
880 int n, codepoint = -1;
881
882 errno = 0;
883
884 if (*s == 'x' || *s == 'X') {
885 if (isxdigit(*++s)) {
886 /* strtol with base 16 accepts leading "0x" - we don't */
887 if (*s == '0' && s[1] == 'x') {
888 s++;
889 codepoint = 0;
890 } else {
891 codepoint = strtol(s, &s, 16);
892 }
893 }
894 } else if (isdigit(*s)) {
895 codepoint = strtol(s, &s, 10);
896 }
897 if (errno)
898 codepoint = -1;
899
900 if (*s == ';')
901 s++;
902 else {
903 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
904 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) {
905 char c = *s;
906 *s = '\0';
907 BUG_MSG("Character reference '&#%s' lacks ';'.", tok);
908 *s = c;
909 }
910 /* Don't require ';' for old HTML, except that our current heuristic
911 * is to require it in attributes to avoid cases like "&copy=1" found
912 * in URLs.
913 */
914 if (is_attr || html->DocType == DT_XHTML ||
915 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
916 return NULL;
917 }
918
919 }
920 if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' &&
921 codepoint != '\f') ||
922 (codepoint >= 0x7f && codepoint <= 0x9f) ||
923 (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff ||
924 ((codepoint & 0xfffe) == 0xfffe) ||
925 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
926 codepoint > 0xffff)) {
927 /* this catches null bytes, errors, codes out of range, disallowed
928 * control chars, permanently undefined chars, and surrogates.
929 */
930 char c = *s;
931 *s = '\0';
932 BUG_MSG("Numeric character reference '&#%s' is not valid.", tok);
933 *s = c;
934
935 codepoint = (codepoint >= 145 && codepoint <= 151) ?
936 Html_ms_stupid_quotes_2ucs(codepoint) : -1;
937 }
938 if (codepoint != -1) {
939 if (codepoint >= 128) {
940 n = a_Utf8_encode(codepoint, buf);
941 } else {
942 n = 1;
943 buf[0] = (char) codepoint;
944 }
945 assert(n < 5);
946 buf[n] = '\0';
947 *entsize = s-tok+2;
948 return buf;
949 } else {
950 return NULL;
951 }
952}
953
957static int Html_charref_comp(const void *a, const void *b)
958{
959 return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref);
960}
961
966{
967 Charref_t RefKey;
968
969 RefKey.ref = key;
970 return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef,
972}
973
978static const char *Html_parse_named_charref(DilloHtml *html, char *tok,
979 bool_t is_attr, int *entsize)
980{
981 Charref_t *p;
982 char c;
983 char *s = tok;
984 const char *ret = NULL;
985
986 while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
987 c = *s;
988 *s = '\0';
989 if (c != ';') {
990 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
991 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f)))
992 BUG_MSG("Character reference '&%s' lacks ';'.", tok);
993
994 /* Don't require ';' for old HTML, except that our current heuristic
995 * is to require it in attributes to avoid cases like "&copy=1" found
996 * in URLs.
997 */
998 if (is_attr || html->DocType == DT_XHTML ||
999 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
1000 return ret;
1001 }
1002 }
1003
1004 if ((p = Html_charref_search(tok))) {
1005 ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ?
1006 p->html5_str : p->html4_str;
1007 }
1008
1009 if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos"))
1010 ret = "'";
1011
1012 *s = c;
1013 if (c == ';')
1014 s++;
1015
1016 if (!ret) {
1017 c = *s;
1018 *s = '\0';
1019 BUG_MSG("Undefined character reference '&%s'.", tok);
1020 *s = c;
1021 }
1022 *entsize = s-tok+1;
1023 return ret;
1024}
1025
1034static const char *Html_parse_entity(DilloHtml *html, const char *token,
1035 int toksize, int *entsize, bool_t is_attr)
1036{
1037 const char *ret = NULL;
1038 char *tok;
1039
1040 if (toksize > 50) {
1041 /* In pathological cases, attributes can be megabytes long and filled
1042 * with character references. As of HTML5, the longest defined character
1043 * reference is about 32 bytes long.
1044 */
1045 toksize = 50;
1046 }
1047
1048 token++;
1049 tok = dStrndup(token, (uint_t)toksize);
1050
1051 if (*tok == '#') {
1052 ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize);
1053 } else if (isalpha(*tok)) {
1054 ret = Html_parse_named_charref(html, tok, is_attr, entsize);
1055 } else if (prefs.show_extra_warnings &&
1056 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) {
1057 // HTML5 doesn't mind literal '&'s.
1058 BUG_MSG("Literal '&'.");
1059 }
1060 dFree(tok);
1061
1062 return ret;
1063}
1064
1069char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
1070{
1071 const char *esc_set = "&";
1072 int i, s, entsize;
1073 char *str;
1074
1075 s = strcspn(token, esc_set);
1076 if (s >= toksize) {
1077 /* no ampersands */
1078 str = dStrndup(token, toksize);
1079 } else {
1080 Dstr *ds = dStr_sized_new(toksize);
1081
1082 dStr_append_l(ds, token, s);
1083
1084 for (i = s; i < toksize; i++) {
1085 const char *entstr;
1086 const bool_t is_attr = FALSE;
1087
1088 if (token[i] == '&' &&
1089 (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize,
1090 is_attr))) {
1091 dStr_append(ds, entstr);
1092 i += entsize-1;
1093 } else {
1094 dStr_append_c(ds, token[i]);
1095 }
1096 }
1097 str = ds->str;
1098 dStr_free(ds, 0);
1099 }
1100 return str;
1101}
1102
1107static void Html_process_space_pre_line(DilloHtml *html, const char *space,
1108 int spacesize)
1109{
1110 int i, breakCnt = 0;
1111
1112 for (i = 0; i < spacesize; i++) {
1113 /* Support for "\r", "\n" and "\r\n" line breaks */
1114 if (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR)) {
1115 breakCnt++;
1116 html->PrevWasCR = (space[i] == '\r');
1117
1118 HT2TB(html)->addLinebreak (html->wordStyle ());
1119 }
1120 }
1121 if (breakCnt == 0) {
1122 HT2TB(html)->addSpace(html->wordStyle ());
1123 }
1124}
1125
1129static void Html_process_space(DilloHtml *html, const char *space,
1130 int spacesize)
1131{
1132 char *spc;
1133 int i, offset;
1134 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1135
1136 if (S_TOP(html)->display_none) {
1137 /* do nothing */
1138 } else if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) {
1139 html->StashSpace = (html->Stash->len > 0);
1140
1141 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1142 dStr_append_l(html->Stash, space, spacesize);
1143
1144 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1145 int spaceCnt = 0;
1146
1147 /* re-scan the string for characters that cause line breaks */
1148 for (i = 0; i < spacesize; i++) {
1149 /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */
1150 if (!html->PreFirstChar &&
1151 (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {
1152
1153 if (spaceCnt) {
1154 spc = dStrnfill(spaceCnt, ' ');
1155 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1156 dFree(spc);
1157 spaceCnt = 0;
1158 }
1159 HT2TB(html)->addLinebreak (html->wordStyle ());
1160 html->pre_column = 0;
1161 }
1162 html->PreFirstChar = false;
1163
1164 /* cr and lf should not be rendered -- they appear as a break */
1165 switch (space[i]) {
1166 case '\r':
1167 case '\n':
1168 break;
1169 case '\t':
1171 BUG_MSG("TAB character inside <pre>.");
1172 offset = TAB_SIZE - html->pre_column % TAB_SIZE;
1173 spaceCnt += offset;
1174 html->pre_column += offset;
1175 break;
1176 default:
1177 spaceCnt++;
1178 html->pre_column++;
1179 break;
1180 }
1181
1182 html->PrevWasCR = (space[i] == '\r');
1183 }
1184
1185 if (spaceCnt) {
1186 // add break possibility for the white-space:pre-wrap case
1187 HT2TB(html)->addBreakOption (html->wordStyle (), false);
1188 spc = dStrnfill(spaceCnt, ' ');
1189 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1190 dFree(spc);
1191 }
1192
1193 } else {
1194 if (SGML_SPCDEL) {
1195 /* SGML_SPCDEL ignores white space immediately after an open tag */
1196 } else if (html->wordStyle ()->whiteSpace == WHITE_SPACE_PRE_LINE) {
1197 Html_process_space_pre_line(html, space, spacesize);
1198 } else {
1199 HT2TB(html)->addSpace(html->wordStyle ());
1200 }
1201
1202 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
1203 html->StashSpace = (html->Stash->len > 0);
1204 }
1205}
1206
1215static void Html_process_word(DilloHtml *html, const char *word, int size)
1216{
1217 int i, j, start;
1218 char *Pword;
1219 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1220
1221 if (S_TOP(html)->display_none)
1222 return;
1223 if ((i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0)) {
1224 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
1225 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
1226 }
1227
1228 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1230 if (html->StashSpace) {
1231 dStr_append_c(html->Stash, ' ');
1232 html->StashSpace = false;
1233 }
1234 Pword = a_Html_parse_entities(html, word, size);
1235 dStr_append(html->Stash, Pword);
1236 dFree(Pword);
1237
1238 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1239 /* word goes in untouched, it is not processed here. */
1240 dStr_append_l(html->Stash, word, size);
1241 }
1242
1243 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1244 parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1245 /* skip until the closing instructions */
1246
1247 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1248 /* all this overhead is to catch white-space entities */
1249 Pword = a_Html_parse_entities(html, word, size);
1250 for (start = i = 0; Pword[i]; start = i)
1251 if (isspace(Pword[i])) {
1252 while (Pword[++i] && isspace(Pword[i])) ;
1253 Html_process_space(html, Pword + start, i - start);
1254 } else {
1255 while (Pword[++i] && !isspace(Pword[i])) ;
1256 HT2TB(html)->addText(Pword + start, i - start, html->wordStyle ());
1257 html->pre_column += i - start;
1258 html->PreFirstChar = false;
1259 }
1260 dFree(Pword);
1261
1262 } else {
1263 const char *word2, *beyond_word2;
1264
1265 Pword = NULL;
1266 if (!memchr(word,'&', size)) {
1267 /* No entities */
1268 word2 = word;
1269 beyond_word2 = word + size;
1270 } else {
1271 /* Collapse white-space entities inside the word (except &nbsp;) */
1272 Pword = a_Html_parse_entities(html, word, size);
1273 /* Collapse adjacent " \t\f\n\r" characters into a single space */
1274 for (i = j = 0; (Pword[i] = Pword[j]); ++i, ++j) {
1275 if (strchr(" \t\f\n\r", Pword[i])) {
1276 if (i == 0 || (i > 0 && Pword[i-1] != ' '))
1277 Pword[i] = ' ';
1278 else
1279 for (--i; Pword[j+1] && strchr(" \t\f\n\r", Pword[j+1]); ++j)
1280 ;
1281 }
1282 }
1283 word2 = Pword;
1284 beyond_word2 = word2 + strlen(word2);
1285 }
1286 for (start = i = 0; word2[i]; start = i) {
1287 int len;
1288
1289 if (isspace(word2[i])) {
1290 while (word2[++i] && isspace(word2[i])) ;
1291 Html_process_space(html, word2 + start, i - start);
1292 } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
1293 i += 3;
1294 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1295 } else if (a_Utf8_ideographic(word2+i, beyond_word2, &len)) {
1296 i += len;
1297 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1298 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1299 } else {
1300 do {
1301 i += len;
1302 } while (word2[i] && !isspace(word2[i]) &&
1303 strncmp(word2+i, utf8_zero_width_space, 3) &&
1304 (!a_Utf8_ideographic(word2+i, beyond_word2, &len)));
1305 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1306 }
1307 }
1308 if (Pword == word2)
1309 dFree(Pword);
1310 }
1311}
1312
1317static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
1318{
1319 int i;
1320
1321 for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
1322 if (D_ASCII_TOLOWER(tagstr[i]) != D_ASCII_TOLOWER(tag[i]))
1323 return false;
1324 }
1325 /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
1326 if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
1327 return true;
1328 return false;
1329}
1330
1335static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
1336{
1337 if (html->dw != S_TOP(html)->textblock) {
1338 if (hand_over_break)
1339 HT2TB(html)->handOverBreak (html->style ());
1340 HT2TB(html)->flush ();
1341 html->dw = S_TOP(html)->textblock;
1342 }
1343}
1344
1348static void Html_push_tag(DilloHtml *html, int tag_idx)
1349{
1350 int n_items;
1351
1352 n_items = html->stack->size ();
1353 html->stack->increase ();
1354 /* We'll copy the former stack item and just change the tag and its index
1355 * instead of copying all fields except for tag. --Jcid */
1356 *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
1357 html->stack->getRef(n_items)->tag_idx = tag_idx;
1358 html->dw = S_TOP(html)->textblock;
1359}
1360
1365static void Html_force_push_tag(DilloHtml *html, int tag_idx)
1366{
1367 html->startElement (tag_idx);
1368 Html_push_tag(html, tag_idx);
1369}
1370
1375{
1376 bool hand_over_break;
1377
1378 html->styleEngine->endElement (S_TOP(html)->tag_idx);
1379 hand_over_break = S_TOP(html)->hand_over_break;
1380 html->stack->setSize (html->stack->size() - 1);
1381 Html_eventually_pop_dw(html, hand_over_break);
1382}
1383
1384
1385
1386/*
1387 * Some parsing routines.
1388 */
1389
1390/*
1391 * Used by a_Html_parse_length
1392 */
1394 char **endptr)
1395{
1396 CssLength l;
1397 double v;
1398 char *end;
1399
1400 v = strtod (attr, &end);
1401 switch (*end) {
1402 case '%':
1403 end++;
1405 break;
1406
1407 case '*':
1408 end++;
1410 break;
1411/*
1412 The "px" suffix seems not allowed by HTML4.01 SPEC.
1413 case 'p':
1414 if (end[1] == 'x')
1415 end += 2;
1416*/
1417 default:
1419 break;
1420 }
1421
1422 if (endptr)
1423 *endptr = end;
1424 return l;
1425}
1426
1427
1432CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
1433{
1434 CssLength l;
1435 char *end;
1436
1437 l = Html_parse_length_or_multi_length (attr, &end);
1439 /* not allowed as &Length; */
1441 else {
1442 /* allow only whitespaces */
1443 if (*end && !isspace (*end)) {
1444 BUG_MSG("Garbage after length: '%s'.", attr);
1446 }
1447 }
1448
1449 _MSG("a_Html_parse_length: \"%s\" %d\n", attr, CSS_LENGTH_VALUE(l));
1450 return l;
1451}
1452
1457int32_t a_Html_color_parse(DilloHtml *html, const char *str,
1458 int32_t default_color)
1459{
1460 int err = 1;
1461 int32_t color = a_Color_parse(str, default_color, &err);
1462
1463 if (err) {
1464 BUG_MSG("Color \"%s\" is not in \"#RRGGBB\" format.", str);
1465 }
1466 return color;
1467}
1468
1474static int
1475 Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
1476{
1477 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) {
1478 bool valid = *val && !strchr(val, ' ');
1479
1480 if (!valid) {
1481 BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
1482 "spaces.", attrname, val);
1483 }
1484 return valid ? 1 : 0;
1485 } else {
1486 int i;
1487
1488 for (i = 0; val[i]; ++i)
1489 if (!isascii(val[i]) || !(isalnum(val[i]) || strchr(":_.-", val[i])))
1490 break;
1491
1492 if (val[i] || !(isascii(val[0]) && isalpha(val[0])))
1493 BUG_MSG("%s attribute value \"%s\" is not of the form "
1494 "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);
1495
1496 return !(val[i]);
1497 }
1498}
1499
1519static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
1520{
1521 static const char HTML_SGML_sig [] = "<!DOCTYPE HTML PUBLIC ";
1522 static const char HTML20 [] = "-//IETF//DTD HTML";
1523 static const char HTML32 [] = "-//W3C//DTD HTML 3.2";
1524 static const char HTML40 [] = "-//W3C//DTD HTML 4.0";
1525 static const char HTML401 [] = "-//W3C//DTD HTML 4.01";
1526 static const char HTML401_url[] = "http://www.w3.org/TR/html4/";
1527 static const char XHTML1 [] = "-//W3C//DTD XHTML 1.0";
1528 static const char XHTML1_url [] = "http://www.w3.org/TR/xhtml1/DTD/";
1529 static const char XHTML11 [] = "-//W3C//DTD XHTML 1.1";
1530 static const char XHTML11_url[] = "http://www.w3.org/TR/xhtml11/DTD/";
1531
1532 size_t i;
1533 int quote;
1534 char *p, *ntag = dStrndup(tag, tagsize);
1535
1536 /* Tag sanitization: Collapse whitespace between tokens
1537 * and replace '\n' and '\r' with ' ' inside quoted strings. */
1538 for (i = 0, p = ntag; *p; ++p) {
1539 if (isspace(*p)) {
1540 for (ntag[i++] = ' '; isspace(p[1]); ++p) ;
1541 } else if ((quote = *p) == '"' || *p == '\'') {
1542 for (ntag[i++] = *p++; (ntag[i] = *p) && ntag[i++] != quote; ++p) {
1543 if (*p == '\n' || *p == '\r')
1544 ntag[i - 1] = ' ';
1545 p += (p[0] == '\r' && p[1] == '\n') ? 1 : 0;
1546 }
1547 } else {
1548 ntag[i++] = *p;
1549 }
1550 if (!*p)
1551 break;
1552 }
1553 ntag[i] = 0;
1554
1555 _MSG("New: {%s}\n", ntag);
1556
1557 if (html->DocType != DT_NONE)
1558 BUG_MSG("Multiple DOCTYPE declarations.");
1559
1560 /* The default DT_NONE type is TagSoup */
1561 if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
1562 !dStrnAsciiCasecmp(ntag, HTML_SGML_sig, strlen(HTML_SGML_sig))) {
1563 p = ntag + strlen(HTML_SGML_sig) + 1;
1564 if (!strncmp(p, HTML401, strlen(HTML401)) &&
1565 dStriAsciiStr(p + strlen(HTML401), HTML401_url)) {
1566 html->DocType = DT_HTML;
1567 html->DocTypeVersion = 4.01f;
1568 } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
1569 dStriAsciiStr(p + strlen(XHTML1), XHTML1_url)) {
1570 html->DocType = DT_XHTML;
1571 html->DocTypeVersion = 1.0f;
1572 } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
1573 dStriAsciiStr(p + strlen(XHTML11), XHTML11_url)) {
1574 html->DocType = DT_XHTML;
1575 html->DocTypeVersion = 1.1f;
1576 } else if (!strncmp(p, HTML40, strlen(HTML40))) {
1577 html->DocType = DT_HTML;
1578 html->DocTypeVersion = 4.0f;
1579 } else if (!strncmp(p, HTML32, strlen(HTML32))) {
1580 html->DocType = DT_HTML;
1581 html->DocTypeVersion = 3.2f;
1582 } else if (!strncmp(p, HTML20, strlen(HTML20))) {
1583 html->DocType = DT_HTML;
1584 html->DocTypeVersion = 2.0f;
1585 }
1586 } else if (!dStrAsciiCasecmp(ntag, "<!DOCTYPE html>") ||
1587 !dStrAsciiCasecmp(ntag, "<!DOCTYPE html >") ||
1588 !dStrAsciiCasecmp(ntag,
1589 "<!DOCTYPE html SYSTEM \"about:legacy-compat\">") ||
1590 !dStrAsciiCasecmp(ntag,
1591 "<!DOCTYPE html SYSTEM 'about:legacy-compat'>")) {
1592 html->DocType = DT_HTML;
1593 html->DocTypeVersion = 5.0f;
1594 }
1595 if (html->DocType == DT_NONE) {
1596 html->DocType = DT_UNRECOGNIZED;
1597 BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
1598 }
1599 dFree(ntag);
1600}
1601
1605static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
1606{
1607 /* The IN_HTML flag will be kept set until at IN_EOF condition.
1608 * This allows to handle pages with multiple or uneven HTML tags */
1609
1610 if (!(html->InFlags & IN_HTML))
1611 html->InFlags |= IN_HTML;
1612 if (html->Num_HTML < UCHAR_MAX)
1613 ++html->Num_HTML;
1614
1615 if (html->Num_HTML > 1) {
1616 BUG_MSG("<html> was already open.");
1617 html->ReqTagClose = true;
1618 }
1619}
1620
1625{
1626 _MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML);
1627
1628 /* As some Tag soup pages use multiple HTML tags, this function
1629 * gets called only on EOF and upon and extra HTML open.
1630 * Also, we defer clearing the IN_HTML flag until IN_EOF */
1631}
1632
1636static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
1637{
1638 if (html->InFlags & IN_BODY) {
1639 BUG_MSG("<head> must go before the BODY section.");
1640 html->ReqTagClose = true;
1641 return;
1642 }
1643
1644 if (html->Num_HEAD < UCHAR_MAX)
1645 ++html->Num_HEAD;
1646 if (html->InFlags & IN_HEAD) {
1647 BUG_MSG("<head> was already open.");
1648 html->ReqTagClose = true;
1649 } else if (html->Num_HEAD > 1) {
1650 BUG_MSG("<head> already finished -- ignoring.");
1651 html->ReqTagClose = true;
1652 } else {
1653 html->InFlags |= IN_HEAD;
1654 }
1655}
1656
1662{
1663 if (html->InFlags & IN_HEAD) {
1664 if (html->Num_HEAD == 1) {
1665 /* match for the well formed start of HEAD section */
1666 if (html->Num_TITLE == 0)
1667 BUG_MSG("<head> lacks <title>.");
1668
1669 html->InFlags &= ~IN_HEAD;
1670
1671 /* charset is already set, load remote stylesheets now */
1672 for (int i = 0; i < html->cssUrls->size(); i++) {
1673 a_Html_load_stylesheet(html, html->cssUrls->get(i));
1674 }
1675 } else if (html->Num_HEAD > 1) {
1676 --html->Num_HEAD;
1677 }
1678 } else {
1679 /* not reached, see Html_tag_cleanup_at_close() */
1680 }
1681}
1682
1687static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
1688{
1689 /* fill the stash buffer so TITLE content can be ignored
1690 * when not valid, redundant or outside HEAD section */
1691 a_Html_stash_init(html);
1692
1693 if (html->InFlags & IN_HEAD) {
1694 if (html->Num_TITLE < UCHAR_MAX)
1695 ++html->Num_TITLE;
1696 if (html->Num_TITLE > 1)
1697 BUG_MSG("Redundant <title>.");
1698 } else {
1699 BUG_MSG("<title> must be inside <head> -- ignoring.");
1700 }
1701}
1702
1708{
1709 /* title is only valid inside HEAD */
1710 if (html->InFlags & IN_HEAD && html->Num_TITLE == 1) {
1711 /* Ignore empty titles: <title></title> */
1712 char *title = html->Stash->str;
1713 if (!title || title[0] == '\0')
1714 return;
1715 a_UIcmd_set_page_title(html->bw, title);
1717 }
1718}
1719
1725static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
1726{
1727 a_Html_stash_init(html);
1728 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1729}
1730
1735{
1736 /* eventually the stash will be sent to an interpreter for parsing */
1737}
1738
1743static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
1744{
1745 const char *attrbuf;
1746
1747 html->loadCssFromStash = true;
1748
1749 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
1750 if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
1751 BUG_MSG("<style> requires type attribute.");
1752 } else if (dStrAsciiCasecmp(attrbuf, "text/css")) {
1753 html->loadCssFromStash = false;
1754 }
1755 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
1756 dStrAsciiCasecmp(attrbuf, "all") && !dStriAsciiStr(attrbuf, "screen")) {
1757 /* HTML 4.01 sec. 6.13 says that media descriptors are case-sensitive,
1758 * but sec. 14.2.3 says that the attribute is case-insensitive.
1759 * TODO can be a comma-separated list.
1760 * TODO handheld.
1761 */
1762 html->loadCssFromStash = false;
1763 }
1764
1765 a_Html_stash_init(html);
1766 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1767}
1768
1773{
1775 html->styleEngine->parse(html, html->base_url, html->Stash->str,
1776 html->Stash->len, CSS_ORIGIN_AUTHOR);
1777}
1778
1779/*
1780 * <BODY>
1781 */
1782static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
1783{
1784 const char *attrbuf;
1785 int32_t color;
1786 style::Color *bgColor;
1787 style::StyleImage *bgImage;
1788 style::BackgroundRepeat bgRepeat;
1789 style::BackgroundAttachment bgAttachment;
1790 style::Length bgPositionX, bgPositionY;
1791
1792 _MSG("Html_tag_open_body Num_BODY=%d\n", html->Num_BODY);
1793 if (!(html->InFlags & IN_BODY))
1794 html->InFlags |= IN_BODY;
1795 if (html->Num_BODY < UCHAR_MAX)
1796 ++html->Num_BODY;
1797
1798 if (html->Num_BODY > 1) {
1799 BUG_MSG("<body> was already open.");
1800 html->ReqTagClose = true;
1801 return;
1802 }
1803
1804 if (html->InFlags & IN_HEAD) {
1805 /* if we're here, it's bad XHTML, no need to recover */
1806 BUG_MSG("Unclosed <head>.");
1807 }
1808
1809 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
1810 color = a_Html_color_parse(html, attrbuf, -1);
1811
1812 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1813 BUG_MSG("<body> bgcolor attribute is obsolete.");
1814
1815 if (color != -1)
1817 CSS_TYPE_COLOR, color);
1818 }
1819
1820 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
1821 color = a_Html_color_parse(html, attrbuf, -1);
1822
1823 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1824 BUG_MSG("<body> text attribute is obsolete.");
1825
1826 if (color != -1)
1828 CSS_TYPE_COLOR, color);
1829 }
1830
1831 html->restyle ();
1832
1833 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) {
1834 html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1);
1835 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1836 BUG_MSG("<body> link attribute is obsolete.");
1837 }
1838
1839 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) {
1840 html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1);
1841 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1842 BUG_MSG("<body> vlink attribute is obsolete.");
1843 }
1844
1845 html->dw->setStyle (html->style ());
1846
1847 bgColor = html->styleEngine->backgroundColor ();
1848 if (bgColor)
1849 HT2LT(html)->setBgColor(bgColor);
1850
1851 bgImage = html->styleEngine->backgroundImage (&bgRepeat, &bgAttachment,
1852 &bgPositionX, &bgPositionY);
1853 if (bgImage)
1854 HT2LT(html)->setBgImage(bgImage, bgRepeat, bgAttachment, bgPositionX,
1855 bgPositionY);
1856
1857 /* Determine a color for visited links.
1858 * This color is computed once per page and used for immediate feedback
1859 * when clicking a link.
1860 * On reload style including color for visited links is computed properly
1861 * according to CSS.
1862 */
1863 html->startElement (i_A);
1864 html->styleEngine->setPseudoVisited ();
1865 if (html->non_css_visited_color != -1) {
1867 html->non_css_visited_color);
1868 }
1869 html->visited_color = html->style ()->color->getColor ();
1870 html->styleEngine->endElement (i_A);
1871
1873 /* get a color that has a "safe distance" from text, link and bg */
1874 html->visited_color =
1876 html->style ()->color->getColor(),
1877 html->non_css_link_color,
1879 }
1880
1881
1882 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
1883}
1884
1885/*
1886 * BODY
1887 */
1889{
1890 _MSG("Html_tag_close_body: Num_BODY=%d\n", html->Num_BODY);
1891
1892 /* As some Tag soup pages use multiple BODY tags, this function
1893 * gets called only on EOF and upon and extra BODY open.
1894 * Also, we defer clearing the IN_BODY flag until IN_EOF */
1895}
1896
1897/*
1898 * <P>
1899 * TODO: what's the point between adding the parbreak before and
1900 * after the push?
1901 */
1902static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
1903{
1904 CssPropertyList props;
1905
1906 a_Html_tag_set_align_attr (html, tag, tagsize);
1907}
1908
1909/*
1910 * <FRAME>, <IFRAME>
1911 * TODO: This is just a temporary fix while real frame support
1912 * isn't finished. Imitates lynx/w3m's frames.
1913 */
1914static void Html_tag_open_frame (DilloHtml *html, const char *tag, int tagsize)
1915{
1916 const char *attrbuf;
1917 DilloUrl *url;
1918 CssPropertyList props;
1919
1920 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1921 return;
1922
1923 if (!(url = a_Html_url_new(html, attrbuf, NULL, 0)))
1924 return;
1925
1927 /* visited frame */
1928 html->styleEngine->setPseudoVisited ();
1929 } else {
1930 /* unvisited frame */
1931 html->styleEngine->setPseudoLink ();
1932 }
1933
1935 Html_set_new_link(html,&url));
1936}
1937
1938static void
1939 Html_tag_content_frame (DilloHtml *html, const char *tag, int tagsize)
1940{
1941 const char *attrbuf;
1942 char *src;
1943 Textblock *textblock;
1944 Widget *bullet;
1945
1946 textblock = HT2TB(html);
1947
1948 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1949 return;
1950
1951 src = dStrdup(attrbuf);
1952
1953 textblock->addParbreak (5, html->wordStyle ());
1954
1955 bullet = new Bullet();
1956 textblock->addWidget(bullet, html->wordStyle ());
1957 textblock->addSpace(html->wordStyle ());
1958
1959 if (D_ASCII_TOLOWER(tag[1]) == 'i') {
1960 /* IFRAME usually comes with very long advertising/spying URLS,
1961 * to not break rendering we will force name="IFRAME" */
1962 textblock->addText ("IFRAME", html->wordStyle ());
1963
1964 } else {
1965 /* FRAME:
1966 * If 'name' tag is present use it, if not use 'src' value */
1967 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
1968 textblock->addText (src, html->wordStyle ());
1969 } else {
1970 textblock->addText (attrbuf, html->wordStyle ());
1971 }
1972 }
1973
1974 textblock->addParbreak (5, html->wordStyle ());
1975
1976 dFree(src);
1977}
1978
1979/*
1980 * <FRAMESET>
1981 * TODO: This is just a temporary fix while real frame support
1982 * isn't finished. Imitates lynx/w3m's frames.
1983 */
1985 const char *tag, int tagsize)
1986{
1987 HT2TB(html)->addParbreak (9, html->wordStyle ());
1988 HT2TB(html)->addText("--FRAME--", html->wordStyle ());
1989 Html_add_textblock(html, true, 5, false);
1990}
1991
1992/*
1993 * <H1> | <H2> | <H3> | <H4> | <H5> | <H6>
1994 */
1995static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
1996{
1997 a_Html_tag_set_align_attr (html, tag, tagsize);
1998
1999 a_Html_stash_init(html);
2000 S_TOP(html)->parse_mode =
2002}
2003
2004/*
2005 * <BR>
2006 */
2007static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
2008{
2009 HT2TB(html)->addLinebreak (html->wordStyle ());
2010}
2011
2012/*
2013 * <FONT>
2014 */
2015static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
2016{
2017 const char *attrbuf;
2018 char *fontFamily = NULL;
2019 int32_t color;
2020
2021 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "color"))) {
2023 color = html->visited_color;
2024 } else {
2025 /* use the tag-specified color */
2026 color = a_Html_color_parse(html, attrbuf, -1);
2027 }
2028 if (color != -1)
2030 CSS_TYPE_COLOR, color);
2031 }
2032
2033 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "face"))) {
2034 fontFamily = dStrdup(attrbuf);
2036 CSS_TYPE_SYMBOL, fontFamily);
2037 }
2038
2039 dFree(fontFamily);
2040}
2041
2042/*
2043 * <ABBR>
2044 */
2045static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
2046{
2047 const char *attrbuf;
2048
2050
2051 if (prefs.show_tooltip &&
2052 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2053
2055 attrbuf);
2056 }
2057}
2058
2062void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
2063{
2064 char *width_ptr, *height_ptr;
2065 const char *attrbuf;
2068 int w = 0, h = 0;
2069
2070 if (prefs.show_tooltip &&
2071 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2073 attrbuf);
2074 }
2075 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2076 height_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "height", NULL);
2077 // Check for malicious values
2078 // TODO: the same for percentage and relative lengths.
2079 if (width_ptr) {
2080 l_w = a_Html_parse_length (html, width_ptr);
2081 w = (int) (CSS_LENGTH_TYPE(l_w) == CSS_LENGTH_TYPE_PX ?
2082 CSS_LENGTH_VALUE(l_w) : 0);
2083 }
2084 if (height_ptr) {
2085 l_h = a_Html_parse_length (html, height_ptr);
2086 h = (int) (CSS_LENGTH_TYPE(l_h) == CSS_LENGTH_TYPE_PX ?
2087 CSS_LENGTH_VALUE(l_h) : 0);
2088 }
2089 /* Check for suspicious image size request that would cause
2090 * an excessive amount of memory to be allocated for the
2091 * image buffer.
2092 * Be careful to avoid integer overflows during the checks.
2093 * There is an additional check in dw/image.cc to catch cases
2094 * where only one dimension is given and the image is scaled
2095 * preserving its original aspect ratio.
2096 * Size requests passed via CSS are also checked there.
2097 */
2098 if (w < 0 || h < 0 ||
2099 w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
2100 (h > 0 && w > IMAGE_MAX_AREA / h)) {
2101 dFree(width_ptr);
2102 dFree(height_ptr);
2103 width_ptr = height_ptr = NULL;
2104 MSG("a_Html_common_image_attrs: suspicious image size request %d x %d\n",
2105 w, h);
2106 } else {
2113 }
2114
2115 /* TODO: we should scale the image respecting its ratio.
2116 * As the image size is not known at this time, maybe a flag
2117 * can be set to scale it later.
2118 if ((width_ptr && !height_ptr) || (height_ptr && !width_ptr))
2119 [...]
2120 */
2121
2122 /* x_img is an index to a list of {url,image} pairs.
2123 * We know a_Html_image_new() will use size() as its next index */
2125 html->images->size());
2126
2127
2128 dFree(width_ptr);
2129 dFree(height_ptr);
2130}
2131
2132DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
2133{
2134 bool load_now;
2135 char *alt_ptr;
2136 const char *attrbuf;
2137 DilloUrl *url;
2139
2140 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")) ||
2141 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
2142 return NULL;
2143
2144 alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL);
2145 if (!alt_ptr || !*alt_ptr) {
2146 dFree(alt_ptr);
2147 alt_ptr = dStrdup("[IMG]");
2148 }
2149
2150 dw::Image *dw = new dw::Image(alt_ptr);
2151 image =
2152 a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0);
2153
2155
2156 if (HT2TB(html)->getBgColor())
2157 image->bg_color = HT2TB(html)->getBgColor()->getColor();
2158
2160 hi->url = url;
2161 html->images->increase();
2162 html->images->set(html->images->size() - 1, hi);
2163
2164 load_now = prefs.load_images ||
2165 !dStrAsciiCasecmp(URL_SCHEME(url), "data") ||
2167
2168 if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
2169 // hi->image is NULL if dillo tries to load the image immediately
2170 hi->image = NULL;
2172 } else {
2173 // otherwise a reference is kept in html->images
2174 hi->image = image;
2175 }
2176
2177 dFree(alt_ptr);
2178 return image;
2179}
2180
2185 const DilloUrl *requester, DilloImage *Image)
2186{
2187 DilloWeb *Web;
2188 int ClientKey;
2189 /* Fill a Web structure for the cache query */
2190 Web = a_Web_new(bw, url, requester);
2191 Web->Image = Image;
2193 Web->flags |= WEB_Image;
2194 /* Request image data from the cache */
2195 if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) {
2196 a_Bw_add_client(bw, ClientKey, 0);
2197 a_Bw_add_url(bw, url);
2198 }
2199 return ClientKey != 0;
2200}
2201
2202static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
2203{
2204 int space, border;
2205 const char *attrbuf;
2206
2207 a_Html_common_image_attrs(html, tag, tagsize);
2208
2209 /* Spacing to the left and right */
2210 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "hspace"))) {
2211 space = strtol(attrbuf, NULL, 10);
2212 if (space > 0) {
2213 space = CSS_CREATE_LENGTH(space, CSS_LENGTH_TYPE_PX);
2218 }
2219 }
2220
2221 /* Spacing at the top and bottom */
2222 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vspace"))) {
2223 space = strtol(attrbuf, NULL, 10);
2224 if (space > 0) {
2225 space = CSS_CREATE_LENGTH(space, CSS_LENGTH_TYPE_PX);
2230 }
2231 }
2232
2233 /* Border */
2234 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "border"))) {
2235 border = strtol(attrbuf, NULL, 10);
2236 if (border >= 0) {
2237 border = CSS_CREATE_LENGTH(border, CSS_LENGTH_TYPE_PX);
2246
2255 }
2256 }
2257
2258}
2259
2265static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
2266{
2268 DilloUrl *usemap_url;
2269 const char *attrbuf;
2270
2271 /* This avoids loading images. Useful for viewing suspicious HTML email. */
2272 if (URL_FLAGS(html->base_url) & URL_SpamSafe)
2273 return;
2274
2275 Image = a_Html_image_new(html, tag, tagsize);
2276 if (!Image)
2277 return;
2278
2279 usemap_url = NULL;
2280 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "usemap")))
2281 /* TODO: usemap URLs outside of the document are not used. */
2282 usemap_url = a_Html_url_new(html, attrbuf, NULL, 0);
2283
2284 // At this point, we know that Image->ir represents an image
2285 // widget. Notice that the order of the casts matters, because of
2286 // multiple inheritance.
2287 dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr;
2288 HT2TB(html)->addWidget(dwi, html->style());
2289 HT2TB(html)->addBreakOption (html->style (), false);
2290
2291 /* Image maps */
2292 if (a_Html_get_attr(html, tag, tagsize, "ismap")) {
2293 dwi->setIsMap();
2294 _MSG(" Html_tag_open_img: server-side map (ISMAP)\n");
2295 } else if (html->style ()->x_link != -1 &&
2296 usemap_url == NULL) {
2297 /* For simple links, we have to suppress the "image_pressed" signal.
2298 * This is overridden for USEMAP images. */
2299// a_Dw_widget_set_button_sensitive (IM2DW(Image->dw), FALSE);
2300 }
2301
2302 if (usemap_url) {
2303 dwi->setUseMap(&html->maps, new ::object::String(URL_STR(usemap_url)));
2304 a_Url_free (usemap_url);
2305 }
2306}
2307
2308/*
2309 * <map>
2310 */
2311static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
2312{
2313 char *hash_name;
2314 const char *attrbuf;
2315 DilloUrl *url;
2316
2317 if (html->InFlags & IN_MAP) {
2318 BUG_MSG("Nested <map>.");
2319 } else {
2320 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2321 html->InFlags |= IN_MAP;
2322 hash_name = dStrconcat("#", attrbuf, NULL);
2323 url = a_Html_url_new(html, hash_name, NULL, 0);
2324 html->maps.startNewMap(new ::object::String(URL_STR(url)));
2325 a_Url_free (url);
2326 dFree(hash_name);
2327 } else {
2328 BUG_MSG("<map> requires name attribute.");
2329 }
2330 }
2331}
2332
2337{
2338 /* This is a hack for the perhaps frivolous feature of drawing image map
2339 * shapes when there is no image to display. If this map is defined after
2340 * an image that has not been loaded (img != NULL), tell the image to
2341 * redraw. (It will only do so if it uses a map.)
2342 */
2343 for (int i = 0; i < html->images->size(); i++) {
2344 DilloImage *img = html->images->get(i)->image;
2345
2346 if (img) {
2347 // At this point, we know that img->ir represents an image
2348 // widget. (Really? Is this assumtion safe?) Notice that the
2349 // order of the casts matters, because of multiple
2350 // inheritance.
2352 dwi->forceMapRedraw();
2353 }
2354 }
2355 html->InFlags &= ~IN_MAP;
2356}
2357
2361static
2363{
2364 int coord;
2365 const char *tail = str;
2366 char *newtail = NULL;
2368
2369 while (1) {
2370 coord = strtol(tail, &newtail, 10);
2371 if (coord == 0 && newtail == tail)
2372 break;
2373 coords->increase();
2374 coords->set(coords->size() - 1, coord);
2375 while (isspace(*newtail))
2376 newtail++;
2377 if (!*newtail)
2378 break;
2379 if (*newtail != ',') {
2380 BUG_MSG("<area> coords must be integers separated by commas.");
2381 }
2382 tail = newtail + 1;
2383 }
2384
2385 return coords;
2386}
2387
2388/*
2389 * <AREA>
2390 */
2391static void
2392 Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
2393{
2394 enum types {UNKNOWN, RECTANGLE, CIRCLE, POLYGON, BACKGROUND};
2395 types type;
2396 misc::SimpleVector<int> *coords = NULL;
2397 DilloUrl* url;
2398 const char *attrbuf;
2399 int link = -1;
2400 Shape *shape = NULL;
2401
2402 if (!(html->InFlags & IN_MAP)) {
2403 BUG_MSG("<area> not inside <map>.");
2404 return;
2405 }
2406 attrbuf = a_Html_get_attr(html, tag, tagsize, "shape");
2407
2408 if (!attrbuf || !*attrbuf || !dStrAsciiCasecmp(attrbuf, "rect")) {
2409 /* the default shape is a rectangle */
2410 type = RECTANGLE;
2411 } else if (dStrAsciiCasecmp(attrbuf, "default") == 0) {
2412 /* "default" is the background */
2413 type = BACKGROUND;
2414 } else if (dStrAsciiCasecmp(attrbuf, "circle") == 0) {
2415 type = CIRCLE;
2416 } else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) {
2417 type = POLYGON;
2418 } else {
2419 BUG_MSG("<area> unknown shape: '%s'.", attrbuf);
2420 type = UNKNOWN;
2421 }
2422 if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
2423 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) {
2424 coords = Html_read_coords(html, attrbuf);
2425
2426 if (type == RECTANGLE) {
2427 if (coords->size() != 4)
2428 BUG_MSG("<area> rectangle must have four coordinate values.");
2429 if (coords->size() >= 4)
2430 shape = new Rectangle(coords->get(0),
2431 coords->get(1),
2432 coords->get(2) - coords->get(0),
2433 coords->get(3) - coords->get(1));
2434 } else if (type == CIRCLE) {
2435 if (coords->size() != 3)
2436 BUG_MSG("<area> circle must have three coordinate values.");
2437 if (coords->size() >= 3)
2438 shape = new Circle(coords->get(0), coords->get(1),
2439 coords->get(2));
2440 } else if (type == POLYGON) {
2441 Polygon *poly;
2442 int i;
2443 if (coords->size() % 2)
2444 BUG_MSG("<area> polygon with odd number of coordinates.");
2445 shape = poly = new Polygon();
2446 for (i = 0; i < (coords->size() / 2); i++)
2447 poly->addPoint(coords->get(2*i), coords->get(2*i + 1));
2448 }
2449 delete(coords);
2450 }
2451 }
2452 if (shape != NULL || type == BACKGROUND) {
2453 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2454 url = a_Html_url_new(html, attrbuf, NULL, 0);
2455 dReturn_if_fail ( url != NULL );
2456
2457 link = Html_set_new_link(html, &url);
2458 }
2459 if (type == BACKGROUND)
2460 html->maps.setCurrentMapDefaultLink(link);
2461 else
2462 html->maps.addShapeToCurrentMap(shape, link);
2463 }
2464}
2465
2466/*
2467 * <OBJECT>
2468 * Simply provide a link if the object is something downloadable.
2469 */
2470static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
2471{
2472 DilloUrl *url, *base_url = NULL;
2473 const char *attrbuf;
2474
2475 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "codebase"))) {
2476 base_url = a_Html_url_new(html, attrbuf, NULL, 0);
2477 }
2478
2479 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "data"))) {
2480 url = a_Html_url_new(html, attrbuf,
2481 URL_STR(base_url), (base_url != NULL));
2482 dReturn_if_fail ( url != NULL );
2483
2485 html->styleEngine->setPseudoVisited ();
2486 } else {
2487 html->styleEngine->setPseudoLink ();
2488 }
2489
2491 Html_set_new_link(html, &url));
2492 }
2494}
2495
2496static void Html_tag_content_object(DilloHtml *html, const char *tag,
2497 int tagsize)
2498{
2499 if (a_Html_get_attr(html, tag, tagsize, "data"))
2500 HT2TB(html)->addText("[OBJECT]", html->wordStyle ());
2501}
2502
2503/*
2504 * <VIDEO>
2505 * Provide a link to the video.
2506 */
2507static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
2508{
2509 DilloUrl *url;
2510 const char *attrbuf;
2511
2512 if (html->InFlags & IN_MEDIA) {
2513 MSG("<video> not handled when already inside a media element.\n");
2514 return;
2515 }
2516 /* TODO: poster attr */
2517
2518 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2519 url = a_Html_url_new(html, attrbuf, NULL, 0);
2520 dReturn_if_fail ( url != NULL );
2521
2523 html->styleEngine->setPseudoVisited ();
2524 } else {
2525 html->styleEngine->setPseudoLink ();
2526 }
2527
2529 Html_set_new_link(html, &url));
2530
2531 HT2TB(html)->addText("[VIDEO]", html->wordStyle ());
2532 }
2533 html->InFlags |= IN_MEDIA;
2534}
2535
2536/*
2537 * <AUDIO>
2538 * Provide a link to the audio.
2539 */
2540static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
2541{
2542 DilloUrl *url;
2543 const char *attrbuf;
2544
2545 if (html->InFlags & IN_MEDIA) {
2546 MSG("<audio> not handled when already inside a media element.\n");
2547 return;
2548 }
2549
2550 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2551 url = a_Html_url_new(html, attrbuf, NULL, 0);
2552 dReturn_if_fail ( url != NULL );
2553
2555 html->styleEngine->setPseudoVisited ();
2556 } else {
2557 html->styleEngine->setPseudoLink ();
2558 }
2559
2561 Html_set_new_link(html, &url));
2562
2563 HT2TB(html)->addText("[AUDIO]", html->wordStyle ());
2564 }
2565 html->InFlags |= IN_MEDIA;
2566}
2567
2568/*
2569 * <SOURCE>
2570 * Media resource; provide a link to its address.
2571 */
2572static void Html_tag_open_source(DilloHtml *html, const char *tag,
2573 int tagsize)
2574{
2575 const char *attrbuf;
2576
2577 if (!(html->InFlags & IN_MEDIA)) {
2578 // Can also be inside a picture element.
2579 // BUG_MSG("<source> not inside a media element.");
2580 return;
2581 }
2582 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2583 BUG_MSG("<source> requires src attribute.");
2584 return;
2585 } else {
2586 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2587
2588 dReturn_if_fail ( url != NULL );
2589
2591 html->styleEngine->setPseudoVisited ();
2592 } else {
2593 html->styleEngine->setPseudoLink ();
2594 }
2596 Html_set_new_link(html, &url));
2597 }
2598}
2599
2600static void Html_tag_content_source(DilloHtml *html, const char *tag,
2601 int tagsize)
2602{
2603 if ((html->InFlags & IN_MEDIA) && a_Html_get_attr(html, tag, tagsize,"src"))
2604 HT2TB(html)->addText("[MEDIA SOURCE]", html->wordStyle ());
2605}
2606
2611{
2612 html->InFlags &= ~IN_MEDIA;
2613}
2614
2615/*
2616 * <EMBED>
2617 * Provide a link to embedded content.
2618 */
2619static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
2620{
2621 const char *attrbuf;
2622
2623 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2624 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2625
2626 dReturn_if_fail ( url != NULL );
2627
2629 html->styleEngine->setPseudoVisited ();
2630 } else {
2631 html->styleEngine->setPseudoLink ();
2632 }
2633
2635 Html_set_new_link(html, &url));
2636 }
2637}
2638
2639static void Html_tag_content_embed(DilloHtml *html,const char *tag,int tagsize)
2640{
2641 if (a_Html_get_attr(html, tag, tagsize, "src"))
2642 HT2TB(html)->addText("[EMBED]", html->wordStyle ());
2643}
2644
2648static const char* Html_get_javascript_link(DilloHtml *html)
2649{
2650 size_t i;
2651 char ch, *p1, *p2;
2652 Dstr *Buf = html->attr_data;
2653
2654 if (dStrnAsciiCasecmp("javascript", Buf->str, 10) == 0) {
2655 i = strcspn(Buf->str, "'\"");
2656 ch = Buf->str[i];
2657 if ((ch == '"' || ch == '\'') &&
2658 (p2 = strchr(Buf->str + i + 1 , ch))) {
2659 p1 = Buf->str + i;
2660 BUG_MSG("Link depends on javascript().");
2661 dStr_truncate(Buf, p2 - Buf->str);
2662 dStr_erase(Buf, 0, p1 - Buf->str + 1);
2663 }
2664 }
2665 return Buf->str;
2666}
2667
2671static void Html_add_anchor(DilloHtml *html, const char *name)
2672{
2673 _MSG("Registering ANCHOR: %s\n", name);
2674 if (!HT2TB(html)->addAnchor (name, html->style ()))
2675 BUG_MSG("Anchor names must be unique within the document (\"%s\").",
2676 name);
2677 /*
2678 * According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that
2679 * differ only in case may not appear in the same document", but
2680 * "comparisons between fragment identifiers and anchor names must be
2681 * done by exact (case-sensitive) match." We ignore the case issue and
2682 * always test for exact matches. Moreover, what does uppercase mean
2683 * for Unicode characters outside the ASCII range?
2684 */
2685}
2686
2687/*
2688 * <A>
2689 */
2690static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
2691{
2692 DilloUrl *url;
2693 const char *attrbuf;
2694
2695 /* TODO: add support for MAP with A HREF */
2696 html->InFlags |= IN_A;
2697 if (html->InFlags & IN_MAP)
2698 Html_tag_content_area(html, tag, tagsize);
2699
2700 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2701 /* if it's a javascript link, extract the reference. */
2702 if (D_ASCII_TOLOWER(attrbuf[0]) == 'j')
2703 attrbuf = Html_get_javascript_link(html);
2704
2705 url = a_Html_url_new(html, attrbuf, NULL, 0);
2706 dReturn_if_fail ( url != NULL );
2707
2709 html->InVisitedLink = true;
2710 html->styleEngine->setPseudoVisited ();
2711 if (html->non_css_visited_color != -1)
2714 html->non_css_visited_color);
2715 } else {
2716 html->styleEngine->setPseudoLink ();
2717 if (html->non_css_link_color != -1)
2720 html->non_css_link_color);
2721 }
2722
2724 Html_set_new_link(html, &url));
2725 }
2726 if (prefs.show_tooltip &&
2727 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2729 attrbuf);
2730 }
2731
2733
2734 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2735 char *nameVal;
2736 const char *id = html->styleEngine->getId ();
2737
2739 Html_check_name_val(html, attrbuf, "name");
2740
2741 nameVal = a_Url_decode_hex_str(attrbuf);
2742
2743 if (nameVal) {
2744 /* We compare the "id" value with the url-decoded "name" value */
2745 if (!id || strcmp(nameVal, id)) {
2746 if (id)
2747 BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.",
2748 id, nameVal);
2749 Html_add_anchor(html, nameVal);
2750 }
2751
2752 dFree(nameVal);
2753 }
2754 }
2755}
2756
2757/*
2758 * <A> close function
2759 */
2760static void Html_tag_close_a(DilloHtml *html)
2761{
2762 html->InFlags &= ~IN_A;
2763 html->InVisitedLink = false;
2764}
2765
2766/*
2767 * <BLOCKQUOTE>
2768 */
2770 const char *tag, int tagsize)
2771{
2772 Html_add_textblock(html, true, 9, false);
2773}
2774
2775/*
2776 * <Q>
2777 */
2778static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
2779{
2780 /*
2781 * Left Double Quotation Mark, which is wrong in many cases, but
2782 * should at least be widely recognized.
2783 */
2784 const char *U201C = "\xe2\x80\x9c";
2785
2787 HT2TB(html)->addText (U201C, html->wordStyle ());
2788}
2789
2790/*
2791 * </Q>
2792 */
2793static void Html_tag_close_q(DilloHtml *html)
2794{
2795 /* Right Double Quotation Mark */
2796 const char *U201D = "\xe2\x80\x9d";
2797
2798 HT2TB(html)->addText (U201D, html->wordStyle ());
2799}
2800
2801/*
2802 * Handle the <UL> tag.
2803 */
2804static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
2805{
2806 const char *attrbuf;
2807 ListStyleType list_style_type;
2808
2809 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2810
2811 /* list_style_type explicitly defined */
2812 if (dStrAsciiCasecmp(attrbuf, "disc") == 0)
2813 list_style_type = LIST_STYLE_TYPE_DISC;
2814 else if (dStrAsciiCasecmp(attrbuf, "circle") == 0)
2815 list_style_type = LIST_STYLE_TYPE_CIRCLE;
2816 else if (dStrAsciiCasecmp(attrbuf, "square") == 0)
2817 list_style_type = LIST_STYLE_TYPE_SQUARE;
2818 else
2819 /* invalid value */
2820 list_style_type = LIST_STYLE_TYPE_DISC;
2821
2823 CSS_TYPE_ENUM, list_style_type);
2824 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2825 BUG_MSG("<ul> type attribute is obsolete.");
2826 }
2827
2828 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2829 S_TOP(html)->list_number = 0;
2830 S_TOP(html)->ref_list_item = NULL;
2831}
2832
2833/*
2834 * Handle the <DIR> or <MENU> tag.
2835 * (Deprecated and almost the same as <UL>)
2836 */
2837static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
2838{
2840 HT2TB(html)->addParbreak (9, html->wordStyle ());
2841
2842 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2843 S_TOP(html)->list_number = 0;
2844 S_TOP(html)->ref_list_item = NULL;
2845
2847 BUG_MSG("Obsolete list type; use <ul> instead.");
2848}
2849
2850/*
2851 * Handle the <MENU> tag.
2852 */
2853static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
2854{
2855 /* In another bit of ridiculous mess from the HTML5 world, the menu
2856 * element, which was deprecated in HTML4:
2857 * - does not appear at all in W3C's HTML5 spec
2858 * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it
2859 * means something totally different than it did in the old days
2860 * (now it's for popup menus and toolbar menus rather than being a
2861 * sort of list).
2862 */
2863 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))
2864 Html_tag_open_dir(html, tag, tagsize);
2865}
2866
2867/*
2868 * Handle the <OL> tag.
2869 */
2870static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
2871{
2872 const char *attrbuf;
2873 int n = 1;
2874
2875 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2876 ListStyleType listStyleType = LIST_STYLE_TYPE_DECIMAL;
2877
2878 if (*attrbuf == '1')
2879 listStyleType = LIST_STYLE_TYPE_DECIMAL;
2880 else if (*attrbuf == 'a')
2881 listStyleType = LIST_STYLE_TYPE_LOWER_ALPHA;
2882 else if (*attrbuf == 'A')
2883 listStyleType = LIST_STYLE_TYPE_UPPER_ALPHA;
2884 else if (*attrbuf == 'i')
2885 listStyleType = LIST_STYLE_TYPE_LOWER_ROMAN;
2886 else if (*attrbuf == 'I')
2887 listStyleType = LIST_STYLE_TYPE_UPPER_ROMAN;
2888
2890 CSS_TYPE_ENUM, listStyleType);
2891 }
2892
2893 S_TOP(html)->list_type = HTML_LIST_ORDERED;
2894
2895 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) &&
2896 (n = (int) strtol(attrbuf, NULL, 10)) < 0) {
2897 BUG_MSG("Illegal '-' character in START attribute; Starting from 0.");
2898 n = 0;
2899 }
2900 S_TOP(html)->list_number = n;
2901 S_TOP(html)->ref_list_item = NULL;
2902}
2903
2904/*
2905 * Handle the <LI> tag.
2906 */
2907static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
2908{
2909 Style *style = html->style ();
2910 int *list_number;
2911 const char *attrbuf;
2912
2913 if (S_TOP(html)->list_type == HTML_LIST_NONE &&
2914 !(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
2915 /* In WHATWG's HTML5 and W3C's HTML 5.1, LI can appear within MENUs
2916 * of the toolbar type.
2917 */
2918 BUG_MSG("<li> outside <ul> or <ol>.");
2919 }
2920
2921 html->InFlags |= IN_LI;
2922
2923 /* Get our parent tag's variables (used as state storage) */
2924 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
2925
2927 // ordered
2928 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) &&
2929 (*list_number = strtol(attrbuf, NULL, 10)) < 0) {
2930 BUG_MSG("Illegal negative list value attribute; Starting from 0.");
2931 *list_number = 0;
2932 }
2933 }
2934}
2935
2936/*
2937 * Close <LI>.
2938 */
2940{
2941 html->InFlags &= ~IN_LI;
2942 ((ListItem *)html->dw)->flush ();
2943}
2944
2945/*
2946 * <HR>
2947 */
2948static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
2949{
2950 char *width_ptr;
2951 const char *attrbuf;
2952 int32_t size = 0;
2953
2954 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2955 if (width_ptr) {
2956 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2957 BUG_MSG("<hr> width attribute is obsolete.");
2960 a_Html_parse_length (html, width_ptr));
2961 dFree(width_ptr);
2962 }
2963
2964 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) {
2965 size = strtol(attrbuf, NULL, 10);
2966 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2967 BUG_MSG("<hr> size attribute is obsolete.");
2968 }
2969
2970 a_Html_tag_set_align_attr(html, tag, tagsize);
2971
2972 /* TODO: evaluate attribute */
2973 if (a_Html_get_attr(html, tag, tagsize, "noshade")) {
2974 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2975 BUG_MSG("<hr> noshade attribute is obsolete.");
2984
2985 if (size <= 0)
2986 size = 1;
2987 }
2988
2989 if (size > 0) {
2990 CssLength size_top = CSS_CREATE_LENGTH ((size+1)/2, CSS_LENGTH_TYPE_PX);
2991 CssLength size_bottom = CSS_CREATE_LENGTH (size / 2, CSS_LENGTH_TYPE_PX);
2993 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
2995 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
2998 size_bottom);
3001 size_bottom);
3002 }
3003
3004}
3005
3006static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
3007{
3008 Widget *hruler;
3009 HT2TB(html)->addParbreak (5, html->wordStyle ());
3010
3011 hruler = new Ruler();
3012 hruler->setStyle (html->style ());
3013 HT2TB(html)->addWidget (hruler, html->style ());
3014 HT2TB(html)->addParbreak (5, html->wordStyle ());
3015}
3016
3017/*
3018 * <DL>
3019 */
3020static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
3021{
3022 /* may want to actually do some stuff here. */
3024 HT2TB(html)->addParbreak (9, html->wordStyle ());
3025}
3026
3027/*
3028 * <DT>
3029 */
3030static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
3031{
3033 HT2TB(html)->addParbreak (9, html->wordStyle ());
3034}
3035
3036/*
3037 * <DD>
3038 */
3039static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
3040{
3041 Html_add_textblock(html, true, 9, false);
3042}
3043
3044/*
3045 * <PRE>
3046 */
3047static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
3048{
3050 HT2TB(html)->addParbreak (9, html->wordStyle ());
3051
3052 html->InFlags |= IN_PRE;
3053}
3054
3055/*
3056 * Custom close for <PRE>
3057 */
3059{
3060 html->InFlags &= ~IN_PRE;
3061}
3062
3067static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
3068{
3069 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
3070 /* HTML5 doesn't say anything about excluding elements */
3071 const char *es_set[] = {"img", "object", "applet", "big", "small", "sub",
3072 "sup", "font", "basefont", NULL};
3073 static int ei_set[10], i;
3074
3075 /* initialize array */
3076 if (!ei_set[0])
3077 for (i = 0; es_set[i]; ++i)
3078 ei_set[i] = a_Html_tag_index(es_set[i]);
3079
3080 for (i = 0; ei_set[i]; ++i)
3081 if (tag_idx == ei_set[i])
3082 return 1;
3083 }
3084 return 0;
3085}
3086
3090static void Html_update_content_type(DilloHtml *html, const char *content)
3091{
3092 const char *new_content = a_Capi_set_content_type(html->page_url, content,
3093 "meta");
3094 /* Cannot ask cache whether the content type was changed, as
3095 * this code in another bw might have already changed it for us.
3096 */
3097 if (a_Misc_content_type_cmp(html->content_type, new_content)) {
3098 html->stop_parser = true; /* The cache buffer is no longer valid */
3099 a_UIcmd_repush(html->bw);
3100 }
3101}
3102
3116static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
3117{
3118 const char meta_template[] =
3119"<table width='100%%'><tr><td bgcolor='#ee0000'>Warning:</td>\n"
3120" <td bgcolor='#8899aa' width='100%%'>\n"
3121" This page uses the NON-STANDARD meta refresh tag.<br> The HTML 4.01 SPEC\n"
3122" (sec 7.4.4) recommends explicitly to avoid it.</td></tr>\n"
3123" <tr><td bgcolor='#a0a0a0' colspan='2'>The author wanted you to go\n"
3124" <a href='%s'>here</a>%s</td></tr></table><br>\n";
3125
3126 const char *p, *equiv, *charset, *content;
3127 char delay_str[64], *mr_url;
3128 DilloUrl *new_url;
3129 int delay;
3130
3131 /* only valid inside HEAD */
3132 if (!(html->InFlags & IN_HEAD)) {
3133 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3134 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3135 /* With the HTML 5.1 draft spec, meta with itemprop may appear
3136 * in the body.
3137 */
3138 BUG_MSG("This <meta> element must be inside the HEAD section.");
3139 }
3140 return;
3141 }
3142
3143 if ((equiv = a_Html_get_attr(html, tag, tagsize, "http-equiv"))) {
3144 if (!dStrAsciiCasecmp(equiv, "refresh") &&
3145 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3146
3147 /* Get delay, if present, and make a message with it */
3148 if ((delay = strtol(content, NULL, 0))) {
3149 snprintf(delay_str, 64, " after %d second%s.",
3150 delay, (delay > 1) ? "s" : "");
3151 } else {
3152 sprintf(delay_str, ".");
3153 }
3154 /* Skip to anything after "URL=" or ";" if "URL=" is not found */
3155 int has_url = 1;
3156 if ((p = dStriAsciiStr(content, "url=")))
3157 content = p + strlen("url=");
3158 else if ((p = strstr(content, ";")))
3159 content = p + strlen(";");
3160 else
3161 has_url = 0;
3162
3163 if (has_url) {
3164 /* Handle the case of a quoted URL */
3165 if (*content == '"' || *content == '\'') {
3166 if ((p = strchr(content + 1, *content)))
3167 mr_url = dStrndup(content + 1, p - content - 1);
3168 else
3169 mr_url = dStrdup(content + 1);
3170 } else {
3171 mr_url = dStrdup(content);
3172 }
3173 } else {
3174 mr_url = dStrdup("");
3175 }
3176
3177 new_url = a_Html_url_new(html, mr_url, NULL, 0);
3178
3179 if (a_Url_cmp(html->base_url, new_url) == 0) {
3180 /* redirection loop, or empty url string: ignore */
3181 BUG_MSG("<meta> refresh: %s.",
3182 *mr_url ? "redirection loop" : "no target URL");
3183 } else if (delay == 0) {
3184 /* zero-delay redirection */
3185 html->stop_parser = true;
3186 if (URL_FLAGS(html->base_url) & URL_SpamSafe) {
3187 a_UIcmd_set_msg(html->bw,
3188 "WARNING: local URL with META refresh. Aborting.");
3189 } else if (a_Capi_dpi_verify_request(html->bw, new_url)) {
3190 a_UIcmd_redirection0((void*)html->bw, new_url);
3191 }
3192 } else {
3193 /* Send a custom HTML message.
3194 * TODO: This is a hairy hack,
3195 * It'd be much better to build a widget. */
3196 Dstr *ds_msg = dStr_sized_new(256);
3197 dStr_sprintf(ds_msg, meta_template, URL_STR(new_url), delay_str);
3198 {
3199 int o_InFlags = html->InFlags;
3200 int o_TagSoup = html->TagSoup;
3201 html->InFlags = IN_BODY + IN_META_HACK;
3202 html->TagSoup = false;
3203 Html_write_raw(html, ds_msg->str, ds_msg->len, 0);
3204 html->TagSoup = o_TagSoup;
3205 html->InFlags = o_InFlags;
3206 }
3207 dStr_free(ds_msg, 1);
3208 }
3209 a_Url_free(new_url);
3210 dFree(mr_url);
3211
3212 } else if (!dStrAsciiCasecmp(equiv, "content-type") &&
3213 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3214 _MSG("Html_tag_open_meta: content={%s}\n", content);
3215 Html_update_content_type(html, content);
3216 }
3217 } else if (html->DocType == DT_HTML && html->DocTypeVersion == 5.0f &&
3218 (charset = a_Html_get_attr(html, tag, tagsize, "charset"))) {
3219 char *content = dStrconcat("text/html; charset=", charset, NULL);
3220
3221 Html_update_content_type(html, content);
3222 dFree(content);
3223 }
3224}
3225
3229static void Html_css_load_callback(int Op, CacheClient_t *Client)
3230{
3231 _MSG("Html_css_load_callback: Op=%d\n", Op);
3232 if (Op) { /* EOF */
3233 BrowserWindow *bw = ((DilloWeb *)Client->Web)->bw;
3234 /* Repush when we've got them all */
3235 if (--bw->NumPendingStyleSheets == 0)
3237 }
3238}
3239
3244{
3245 char *data;
3246 int len;
3247
3248 dReturn_if (url == NULL || ! prefs.load_stylesheets);
3249
3250 _MSG("Html_load_stylesheet: ");
3252 a_Capi_get_buf(url, &data, &len)) {
3253 _MSG("cached URL=%s len=%d", URL_STR(url), len);
3254 if (strncmp("@charset \"", data, 10) == 0) {
3255 char *endq = strchr(data+10, '"');
3256
3257 if (endq && (endq - data <= 51)) {
3258 /* IANA limits charset names to 40 characters */
3259 char *content_type;
3260
3261 *endq = '\0';
3262 content_type = dStrconcat("text/css; charset=", data+10, NULL);
3263 *endq = '"';
3264 a_Capi_unref_buf(url);
3267 a_Capi_get_buf(url, &data, &len);
3268 }
3269 }
3270 html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
3271 a_Capi_unref_buf(url);
3272 } else {
3273 /* Fill a Web structure for the cache query */
3274 int ClientKey;
3275 DilloWeb *Web = a_Web_new(html->bw, url, html->page_url);
3276 Web->flags |= WEB_Stylesheet;
3277 if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) {
3278 ++html->bw->NumPendingStyleSheets;
3279 a_Bw_add_client(html->bw, ClientKey, 0);
3280 a_Bw_add_url(html->bw, url);
3281 MSG("NumPendingStyleSheets=%d\n", html->bw->NumPendingStyleSheets);
3282 }
3283 }
3284 _MSG("\n");
3285}
3286
3295static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
3296{
3297 DilloUrl *url;
3298 const char *attrbuf;
3299
3300 //char *tag_str = dStrndup(tag, tagsize);
3301 //MSG("Html_tag_open_link(): %s\n", tag_str);
3302 //dFree(tag_str);
3303
3304 /* When viewing suspicious HTML email, don't load LINK */
3306
3307 /* Ignore LINK outside HEAD */
3308 if (!(html->InFlags & IN_HEAD)) {
3309 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3310 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3311 /* With the HTML 5.1 draft spec, link with itemprop may appear
3312 * in the body.
3313 */
3314 BUG_MSG("This <link> element must be inside the HEAD section.");
3315 }
3316 return;
3317 }
3318 /* Remote stylesheets enabled? */
3320 /* CSS stylesheet link */
3321 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "rel")) ||
3322 dStrAsciiCasecmp(attrbuf, "stylesheet"))
3323 return;
3324
3325 /* IMPLIED attributes? */
3326 if (((attrbuf = a_Html_get_attr(html, tag, tagsize, "type")) &&
3327 dStrAsciiCasecmp(attrbuf, "text/css")) ||
3328 ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
3329 !dStriAsciiStr(attrbuf, "screen") && dStrAsciiCasecmp(attrbuf, "all")))
3330 return;
3331
3332 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "href")) ||
3333 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
3334 return;
3335
3336 _MSG(" Html_tag_open_link(): addCssUrl %s\n", URL_STR(url));
3337
3338 html->addCssUrl(url);
3339 a_Url_free(url);
3340}
3341
3345static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
3346{
3347 const char *attrbuf;
3348 DilloUrl *BaseUrl;
3349
3350 if (html->InFlags & IN_HEAD) {
3351 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
3352 bool_t html5 = html->DocType == DT_HTML &&
3353 html->DocTypeVersion >= 5.0f;
3354
3355 BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) :
3356 a_Html_url_new(html, attrbuf, "", 1);
3357
3358 if (html5 || URL_SCHEME_(BaseUrl)) {
3359 /* Pass the URL_SpamSafe flag to the new base url */
3361 BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe);
3362 a_Url_free(html->base_url);
3363 html->base_url = BaseUrl;
3364 } else {
3365 BUG_MSG("<base> URI is relative (it MUST be absolute).");
3366 a_Url_free(BaseUrl);
3367 }
3368 }
3369 } else {
3370 BUG_MSG("<base> not inside HEAD section.");
3371 }
3372}
3373
3374static void Html_tag_open_default(DilloHtml *html,const char *tag,int tagsize)
3375{
3377}
3378
3379/*
3380 * <SPAN>
3381 */
3382static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
3383{
3384 const char *attrbuf;
3385
3387
3388 if (prefs.show_tooltip &&
3389 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3390
3392 attrbuf);
3393 }
3394}
3395
3396/*
3397 * html5 sectioning stuff: article aside nav section header footer
3398 */
3399static void Html_tag_open_sectioning(DilloHtml *html, const char *tag,
3400 int tagsize)
3401{
3402 const char *attrbuf;
3403
3404 if (prefs.show_tooltip &&
3405 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3406
3408 attrbuf);
3409 }
3410}
3411
3412/*
3413 * <DIV> (TODO: make a complete implementation)
3414 */
3415static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
3416{
3417 a_Html_tag_set_align_attr (html, tag, tagsize);
3418 Html_tag_open_sectioning(html, tag, tagsize);
3419}
3420
3425{
3426 HT2TB(html)->addParbreak (9, html->wordStyle ());
3427}
3428
3429/*
3430 * <WBR> "The wbr element represents a line break opportunity."
3431 */
3432static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
3433{
3434 HT2TB(html)->addBreakOption(html->wordStyle (), true);
3435}
3436
3437
3465static const TagInfo Tags[] = {
3466 {"a", B8(01011),'R', Html_tag_open_a, NULL, Html_tag_close_a},
3467 {"abbr", B8(01011),'R', Html_tag_open_abbr, NULL, NULL},
3468 /* acronym 010101 -- obsolete in HTML5 */
3469 {"address", B8(01110),'R', Html_tag_open_default, NULL, Html_tag_close_par},
3470 {"area", B8(01001),'F', Html_tag_open_default, Html_tag_content_area, NULL},
3471 {"article", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3472 {"aside", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3473 {"audio", B8(01111),'R', Html_tag_open_audio, NULL, Html_tag_close_media},
3474 {"b", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3475 {"base", B8(10001),'F', Html_tag_open_base, NULL, NULL},
3476 /* basefont 010001 -- obsolete in HTML5 */
3477 /* bdo 010101 */
3478 {"big", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3479 {"blockquote", B8(01110),'R', Html_tag_open_blockquote, NULL, NULL},
3480 {"body", B8(01110),'O', Html_tag_open_body, NULL, Html_tag_close_body},
3481 {"br", B8(01001),'F', Html_tag_open_default, Html_tag_content_br, NULL},
3482 {"button", B8(01111),'R', Html_tag_open_button,NULL,Html_tag_close_button},
3483 /* caption */
3484 {"center", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3485 {"cite", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3486 {"code", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3487 /* col 010010 'F' */
3488 /* colgroup */
3489 {"dd", B8(01110),'O', Html_tag_open_dd, NULL, NULL},
3490 {"del", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3491 {"dfn", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3492 {"dir", B8(01100),'R', Html_tag_open_dir, NULL, Html_tag_close_par},
3493 /* TODO: complete <div> support! */
3494 {"div", B8(01110),'R', Html_tag_open_div, NULL, NULL},
3495 {"dl", B8(01100),'R', Html_tag_open_dl, NULL, Html_tag_close_par},
3496 {"dt", B8(01010),'O', Html_tag_open_dt, NULL, Html_tag_close_par},
3497 {"em", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3498 {"embed", B8(01001),'F', Html_tag_open_embed, Html_tag_content_embed, NULL},
3499 /* fieldset */
3500 {"figcaption", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3501 {"figure", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3502 {"font", B8(01011),'R', Html_tag_open_font, NULL, NULL},
3503 {"footer", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3504 {"form", B8(01110),'R', Html_tag_open_form, NULL, Html_tag_close_form},
3505 {"frame", B8(01000),'F', Html_tag_open_frame, Html_tag_content_frame, NULL},
3506 {"frameset", B8(01110),'R', Html_tag_open_default, Html_tag_content_frameset,
3507 NULL},
3508 {"h1", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3509 {"h2", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3510 {"h3", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3511 {"h4", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3512 {"h5", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3513 {"h6", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3514 {"head", B8(10111),'O', Html_tag_open_head, NULL, Html_tag_close_head},
3515 {"header", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3516 {"hr", B8(01000),'F', Html_tag_open_hr, Html_tag_content_hr, NULL},
3517 {"html", B8(00110),'O', Html_tag_open_html, NULL, Html_tag_close_html},
3518 {"i", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3519 {"iframe", B8(01111),'R', Html_tag_open_frame, Html_tag_content_frame, NULL},
3520 {"img", B8(01001),'F', Html_tag_open_img, Html_tag_content_img, NULL},
3521 {"input", B8(01001),'F', Html_tag_open_input, NULL, NULL},
3522 {"ins", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3523 {"isindex", B8(11001),'F', Html_tag_open_isindex, NULL, NULL},
3524 {"kbd", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3525 /* label 010101 */
3526 /* legend 01?? */
3527 {"li", B8(01110),'O', Html_tag_open_li, NULL, Html_tag_close_li},
3528 {"link", B8(10001),'F', Html_tag_open_link, NULL, NULL},
3529 {"main", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3530 {"map", B8(01101),'R', Html_tag_open_default, Html_tag_content_map,
3532 {"mark", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3533 /* menu 1010 -- TODO: not exactly 1010, it can contain LI and inline */
3534 {"menu", B8(01100),'R', Html_tag_open_menu, NULL, Html_tag_close_par},
3535 {"meta", B8(11001),'F', Html_tag_open_meta, NULL, NULL},
3536 {"nav", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3537 /* noframes 1011 -- obsolete in HTML5 */
3538 /* noscript 1011 */
3539 {"object", B8(11111),'R', Html_tag_open_object, Html_tag_content_object,NULL},
3540 {"ol", B8(01100),'R', Html_tag_open_ol, NULL, NULL},
3541 {"optgroup", B8(01011),'O', Html_tag_open_optgroup, NULL,
3543 {"option", B8(01001),'O', Html_tag_open_option, NULL, Html_tag_close_option},
3544 {"p", B8(01010),'O', Html_tag_open_p, NULL, NULL},
3545 /* param 010001 'F' */
3546 {"pre", B8(01010),'R', Html_tag_open_pre, NULL, Html_tag_close_pre},
3547 {"q", B8(01011),'R', Html_tag_open_q, NULL, Html_tag_close_q},
3548 {"s", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3549 {"samp", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3550 {"script", B8(11101),'R', Html_tag_open_script,NULL,Html_tag_close_script},
3551 {"section", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3552 {"select", B8(01011),'R', Html_tag_open_select,NULL,Html_tag_close_select},
3553 {"small", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3554 {"source", B8(01001),'F', Html_tag_open_source, Html_tag_content_source,NULL},
3555 {"span", B8(01011),'R', Html_tag_open_span, NULL, NULL},
3556 {"strike", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3557 {"strong", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3558 {"style", B8(10011),'R', Html_tag_open_style, NULL, Html_tag_close_style},
3559 {"sub", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3560 {"sup", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3561 {"table", B8(01100),'R', Html_tag_open_table, Html_tag_content_table, NULL},
3562 {"tbody", B8(01010),'O', Html_tag_open_tbody, Html_tag_content_tbody, NULL},
3563 {"td", B8(01110),'O', Html_tag_open_td, Html_tag_content_td, NULL},
3564 {"textarea", B8(01011),'R', Html_tag_open_textarea, Html_tag_content_textarea,
3566 {"tfoot", B8(01010),'O', Html_tag_open_tfoot, NULL, NULL},
3567 {"th", B8(01110),'O', Html_tag_open_th, Html_tag_content_th, NULL},
3568 {"thead", B8(01010),'O', Html_tag_open_thead, NULL, NULL},
3569 {"title", B8(10011),'R', Html_tag_open_title, NULL, Html_tag_close_title},
3570 {"tr", B8(01100),'O', Html_tag_open_tr, Html_tag_content_tr, NULL},
3571 {"tt", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3572 {"u", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3573 {"ul", B8(01100),'R', Html_tag_open_ul, NULL, NULL},
3574 {"var", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3575 {"video", B8(01111),'R', Html_tag_open_video, NULL, Html_tag_close_media},
3576 {"wbr", B8(01011),'F', Html_tag_open_default, Html_tag_content_wbr, NULL}
3577};
3578#define NTAGS (sizeof(Tags)/sizeof(Tags[0]))
3579
3580/* Only available in C++11 and up */
3581#if __cpp_static_assert
3582static_assert(NTAGS == HTML_NTAGS,
3583 "Mismatch between number of tags in Tags and HTML_NTAGS");
3584#endif
3585
3586/*
3587 * Compares tag from buffer ('/' or '>' or space-ended string) [p1]
3588 * with tag from taglist (lowercase, zero ended string) [p2]
3589 * Return value: as strcmp()
3590 */
3591static int Html_tag_compare(const char *p1, const char *p2)
3592{
3593 while ( *p2 ) {
3594 if (D_ASCII_TOLOWER(*p1) != *p2)
3595 return(D_ASCII_TOLOWER(*p1) - *p2);
3596 ++p1;
3597 ++p2;
3598 }
3599 return !strchr(" >/\n\r\t", *p1);
3600}
3601
3606int a_Html_tag_index(const char *tag)
3607{
3608 int low, high, mid, cond;
3609
3610 /* Binary search */
3611 low = 0;
3612 high = NTAGS - 1; /* Last tag index */
3613 while (low <= high) {
3614 mid = (low + high) / 2;
3615 if ((cond = Html_tag_compare(tag, Tags[mid].name)) < 0 )
3616 high = mid - 1;
3617 else if (cond > 0)
3618 low = mid + 1;
3619 else
3620 return mid;
3621 }
3622 return -1;
3623}
3624
3633static int Html_triggers_optional_close(int old_idx, int cur_idx)
3634{
3635 int Flags = Tags[cur_idx].Flags;
3636 if (old_idx == i_P || old_idx == i_DT) {
3637 /* P and DT are closed by block elements (i.e. non inline)*/
3638 return (!(Flags & 1));
3639 } else if (old_idx == i_LI) {
3640 /* LI closes LI
3641 * Note: non-flow should also close it, but FF does not. */
3642 return (cur_idx == i_LI);
3643 } else if (old_idx == i_TD || old_idx == i_TH) {
3644 /* TD and TH are closed by: TD, TH and TR.
3645 * Note: non-flow should also close it, but FF does not. */
3646 return (cur_idx == i_TD || cur_idx == i_TH || cur_idx == i_TR);
3647 } else if (old_idx == i_TR) {
3648 /* TR closes TR */
3649 return (cur_idx == i_TR);
3650 } else if (old_idx == i_DD) {
3651 /* DD is closed by DD and DT */
3652 return (cur_idx == i_DD || cur_idx == i_DT);
3653 } else if (old_idx == i_OPTGROUP) {
3654 /* i_OPTGROUP can only contain OPTION */
3655 return (cur_idx != i_OPTION);
3656 } else if (old_idx == i_OPTION) {
3657 return 1; // OPTION always needs close
3658 }
3659
3660 /* Don't close HTML, HEAD and BODY. They're handled by Html_test_section().
3661 * TODO: TBODY is pending */
3662 return 0;
3663}
3664
3672static inline int Html_forbids_cross_nesting(const int InFlags,
3673 const int new_idx)
3674{
3675 int f = InFlags, ni = new_idx, oi = -1;
3676 if (f & (IN_A | IN_BUTTON | IN_SELECT | IN_TEXTAREA) &&
3677 (ni == i_A || ni == i_BUTTON || ni == i_SELECT || ni == i_TEXTAREA))
3678 oi = (f & IN_A ? i_A : f & IN_BUTTON ? i_BUTTON : f & IN_SELECT ?
3679 i_SELECT : f & IN_TEXTAREA ? i_TEXTAREA : 0);
3680 return oi;
3681}
3682
3691static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx,
3692 int new_idx, int fi, char op)
3693{
3694 int s_top, ni = new_idx;
3695 while ((s_top = html->stack->size() - 1) >= s_idx) {
3696 int toptag_idx = S_TOP(html)->tag_idx;
3697 TagInfo toptag = Tags[toptag_idx];
3698
3699 if (fi >= 0) {
3700 // forbidden nesting
3701 if (toptag_idx != fi)
3702 BUG_MSG(" Nesting cleanup - forcing close of open tag: <%s>.",
3703 toptag.name);
3704 } else if (s_top == s_idx && op == 'c') {
3705 // target tag, no bug when closing.
3706 } else if (toptag.EndTag == 'O') {
3707 // optional close, that's OK
3708 } else if ((!(toptag.Flags & 4) &&
3709 (Tags[ni].Flags & 4 || !(Tags[ni].Flags & 1))) ||
3710 (Tags[ni].Flags & 1 && !(toptag.Flags & 2))) {
3711 // block {element, container} in non block container or
3712 // inline element in non inline container
3713 BUG_MSG((op == 'o') ?
3714 "Bad nesting: <%s> can't contain <%s>. -- closing <%s>." :
3715 "<%s> needs to be closed before </%s>. -- closing <%s>.",
3716 toptag.name, Tags[ni].name, toptag.name);
3717 } else {
3718 BUG_MSG(
3719 "<%s> should have been closed before </%s>. -- closing <%s>.",
3720 toptag.name, Tags[ni].name, toptag.name);
3721 }
3722 _MSG("op(%c): %s s_top=%d s_idx=%d\n", op, toptag.name, s_top, s_idx);
3723 if (toptag_idx == i_BODY &&
3724 !((html->InFlags & IN_EOF) || html->ReqTagClose)) {
3725 (s_idx == 1 ? html->PrevWasHtmlClose : html->PrevWasBodyClose) = true;
3726 break; // only pop {BODY,HTML} upon EOF or redundancy
3727 }
3728 if (toptag.close)
3729 toptag.close(html);
3730 Html_real_pop_tag(html);
3731 }
3732}
3733
3743static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
3744{
3745 if (!html->TagSoup)
3746 return;
3747
3748 int s_top = html->stack->size() - 1, s_idx;
3749 int fi = Html_forbids_cross_nesting(html->InFlags, ni);
3750 for (s_idx = s_top; s_idx > 0; --s_idx) {
3751 int ti = html->stack->getRef(s_idx)->tag_idx;
3752
3753 if (fi >= 0) {
3754 // forbidden cross nesting found
3755 if (ti != fi)
3756 continue; // don't allow, close
3757 --s_idx;
3758 BUG_MSG("Forbidden nesting: <%s> can't contain <%s>. -- closing "
3759 "<%s>.", Tags[fi].name, Tags[ni].name, Tags[fi].name);
3760
3761 } else if ((html->InFlags & IN_PRE) && ni == i_HR) {
3762 break; // allow Apache's bad HTML directory listings...
3763
3764 } else if (Tags[ti].EndTag == 'O') { // Element with optional close
3765 if (Html_triggers_optional_close(ti, ni))
3766 continue; // close
3767 } else if (!(Tags[ni].Flags & 1) && !(Tags[ti].Flags & 4)) {
3768 // Block element over a NON block container
3769 if (ti == i_A && html->DocTypeVersion >= 5.0f)
3770 break;
3771 continue; // close
3772 }
3773
3774 break;
3775 }
3776
3777 if (s_idx < s_top)
3778 Html_tag_cleanup_to_idx(html, s_idx + 1, ni, fi, 'o');
3779}
3780
3793static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
3794{
3795 int stack_idx, tag_idx, matched = 0, expected = 0;
3796 TagInfo new_tag = Tags[new_idx];
3797
3798 /* Look for the candidate tag to close */
3799 stack_idx = html->stack->size();
3800 while (--stack_idx) {
3801 tag_idx = html->stack->getRef(stack_idx)->tag_idx;
3802 if (tag_idx == new_idx) {
3803 /* matching tag found */
3804 matched = 1;
3805 break;
3806 } else if (Tags[tag_idx].EndTag == 'O') {
3807 /* close elements with optional close */
3808 continue;
3809 } else if ((new_idx == i_A && html->InFlags & IN_A) ||
3810 (new_idx == i_BUTTON && html->InFlags & IN_BUTTON) ||
3811 (new_idx == i_SELECT && html->InFlags & IN_SELECT) ||
3812 (new_idx == i_TEXTAREA && html->InFlags & IN_TEXTAREA)) {
3813 /* Let these elements close anything left open inside them */
3814 continue;
3815 } else if (Tags[new_idx].Flags & 4 && // Block container
3816 Tags[stack_idx].Flags & 3) { // Inline element or container
3817 /* Let a block container close inline elements left open inside it. */
3818 continue;
3819 } else {
3820 /* this is the tag that should have been closed */
3821 expected = 1;
3822 break;
3823 }
3824 }
3825
3826 if (matched) {
3827 Html_tag_cleanup_to_idx(html, stack_idx, new_idx, -1, 'c');
3828 } else if (expected) {
3829 BUG_MSG("Unexpected closing tag: </%s> -- expected </%s>.",
3830 new_tag.name, Tags[tag_idx].name);
3831 } else {
3832 BUG_MSG("Unexpected closing tag: </%s>.", new_tag.name);
3833 }
3834}
3835
3840static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
3841{
3842 const char *tag;
3843 int tag_idx;
3844
3845 if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE)
3846 BUG_MSG("The required DOCTYPE declaration is missing. "
3847 "Handling as HTML4.");
3848
3849 if (!(html->InFlags & IN_HTML)) {
3850 tag = "<html>";
3851 tag_idx = a_Html_tag_index(tag + 1);
3852 if (tag_idx != new_idx || IsCloseTag) {
3853 /* implicit open */
3854 Html_force_push_tag(html, tag_idx);
3855 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3856 Tags[tag_idx].open (html, tag, strlen(tag));
3857 }
3858 }
3859
3860 if (Tags[new_idx].Flags & 16) {
3861 /* head element */
3862 if (!(html->InFlags & IN_HEAD) && html->Num_HEAD == 0) {
3863 tag = "<head>";
3864 tag_idx = a_Html_tag_index(tag + 1);
3865 if (tag_idx != new_idx || IsCloseTag) {
3866 /* implicit open of the head element */
3867 Html_force_push_tag(html, tag_idx);
3868 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3869 Tags[tag_idx].open (html, tag, strlen(tag));
3870 }
3871 }
3872
3873 } else if (Tags[new_idx].Flags & 8) {
3874 /* body element */
3875 if (html->InFlags & IN_HEAD) {
3876 tag = "</head>";
3877 tag_idx = a_Html_tag_index(tag + 2);
3878 Html_tag_cleanup_at_close(html, tag_idx);
3879 }
3880 tag = "<body>";
3881 tag_idx = a_Html_tag_index(tag + 1);
3882 if (tag_idx != new_idx || IsCloseTag) {
3883 /* implicit open */
3884 Html_force_push_tag(html, tag_idx);
3885 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3886 Tags[tag_idx].open (html, tag, strlen(tag));
3887 }
3888 }
3889}
3890
3894static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
3895{
3896 const char *attrbuf;
3897 char lang[3];
3898
3899 if (tagsize >= 8 && /* length of "<t id=i>" */
3900 (attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) {
3901 /* According to the SGML declaration of HTML 4, all NAME values
3902 * occuring outside entities must be converted to uppercase
3903 * (this is what "NAMECASE GENERAL YES" says). But the HTML 4
3904 * spec states in Sec. 7.5.2 that anchor ids are case-sensitive.
3905 * So we don't do it and hope for better specs in the future ...
3906 */
3907 Html_check_name_val(html, attrbuf, "id");
3908
3909 html->styleEngine->setId(attrbuf);
3910 }
3911
3912 if (tagsize >= 11 && (prefs.parse_embedded_css || prefs.load_stylesheets)) {
3913 /* length of "<t class=i>" or "<t style=i>" */
3914 attrbuf = a_Html_get_attr(html, tag, tagsize, "class");
3915 if (attrbuf)
3916 html->styleEngine->setClass (attrbuf);
3917
3918 attrbuf = a_Html_get_attr(html, tag, tagsize, "style");
3919 if (attrbuf)
3920 html->styleEngine->setStyle (attrbuf);
3921 }
3922
3923 /* handle "xml:lang" and "lang" attributes
3924 * We use only the first two chars of the value to deal with
3925 * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt)
3926 */
3927 memset(lang, 0, sizeof(lang));
3928 if (tagsize >= 14) {
3929 /* length of "<t xml:lang=i>" */
3930 attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang");
3931 if (attrbuf)
3932 strncpy(lang, attrbuf, 2);
3933 }
3934 if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
3935 /* length of "<t lang=i>" */
3936 attrbuf = a_Html_get_attr(html, tag, tagsize, "lang");
3937 if (attrbuf)
3938 strncpy(lang, attrbuf, 2);
3939 }
3940 if (lang[0])
3942}
3943
3948static void Html_check_html5_obsolete(DilloHtml *html, int ni)
3949{
3950 static int indexes[9] = {-1};
3951
3952 if (indexes[0] == -1) {
3953 indexes[0] = a_Html_tag_index("dir");
3954 indexes[1] = a_Html_tag_index("frame");
3955 indexes[2] = a_Html_tag_index("frameset");
3956 indexes[3] = a_Html_tag_index("isindex");
3957 indexes[4] = a_Html_tag_index("strike");
3958 indexes[5] = a_Html_tag_index("big");
3959 indexes[6] = a_Html_tag_index("center");
3960 indexes[7] = a_Html_tag_index("font");
3961 indexes[8] = a_Html_tag_index("tt");
3962 }
3963 for (int i = 0; i < 9; i++) {
3964 if (indexes[i] == ni) {
3965 BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name);
3966 break;
3967 }
3968 }
3969}
3970
3972{
3974 false /* Perhaps true for widgets oof? */);
3975}
3976
3978{
3979 Html_add_textblock(html, false, 0, true);
3980}
3981
3983{
3984 Style *style = html->style ();
3985 Style *wordStyle = html->wordStyle ();
3986 Widget **ref_list_item;
3987 ListItem *list_item;
3988 int *list_number;
3989 char buf[16];
3990
3991 /* Get our parent tag's variables (used as state storage) */
3992 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
3993 ref_list_item = &html->stack->getRef(html->stack->size()-2)->ref_list_item;
3994
3995 HT2TB(html)->addParbreak (0, wordStyle);
3996
3997 list_item = new ListItem ((ListItem*)*ref_list_item,prefs.limit_text_width);
3998 HT2TB(html)->addWidget (list_item, style);
3999 HT2TB(html)->addParbreak (0, wordStyle);
4000 *ref_list_item = list_item;
4001 S_TOP(html)->textblock = html->dw = list_item;
4002
4004 // none
4006 // ordered
4007 numtostr((*list_number)++, buf, 16, style->listStyleType);
4008 list_item->initWithText (buf, wordStyle);
4009 } else {
4010 // unordered
4011 list_item->initWithWidget (new Bullet(), wordStyle);
4012 }
4013}
4014
4020static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
4021{
4022 int ti, ni; /* stack tag index and new tag index */
4023 char *start = tag + 1; /* discard the '<' */
4024 int IsCloseTag = (*start == '/');
4025
4026 dReturn_if (html->stop_parser == true);
4027
4028 ni = a_Html_tag_index(start + IsCloseTag);
4029 if (ni == -1) {
4030 /* TODO: doctype parsing is a bit fuzzy, but enough for the time being */
4031 if (!(html->InFlags & IN_HTML)) {
4032 if (tagsize > 9 && !dStrnAsciiCasecmp(tag, "<!doctype", 9))
4033 Html_parse_doctype(html, tag, tagsize);
4034 }
4035 /* Ignore unknown tags */
4036 return;
4037 }
4038 _MSG("Html_process_tag: %s%s\n", IsCloseTag ? "/" : "", Tags[ni].name);
4039
4040 if (!IsCloseTag && html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
4041 Html_check_html5_obsolete(html, ni);
4042
4043 int i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0;
4044 if (i == 1 || (i == 2 && ni != i_HTML))
4045 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
4046 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
4047
4048 /* Handle HTML, HEAD and BODY. Elements with optional open and close */
4049 if (!(html->InFlags & IN_BODY) /* && parsing HTML */)
4050 Html_test_section(html, ni, IsCloseTag);
4051
4052 /* Tag processing */
4053 ti = S_TOP(html)->tag_idx;
4054 switch (IsCloseTag) {
4055 case 0:
4056 /* Open function */
4057
4058 /* Cleanup before opening a new tag */
4059 if (ti != -1)
4061
4062 /* TODO: this is only raising a warning, but allows the element.
4063 * Note: Apache uses IMG inside PRE. */
4064 if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(html, ni))
4065 BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name);
4066
4067 /* Push the tag into the stack */
4068 Html_push_tag(html, ni);
4069
4070 html->startElement (ni);
4071 _MSG("Open : %*s%s\n", html->stack->size(), " ", Tags[ni].name);
4072
4073 /* Parse attributes that can appear on any tag */
4074 Html_parse_common_attrs(html, tag, tagsize);
4075
4076 /* Call the open function for this tag */
4077 _MSG("Html_process_tag Open : %s\n", Tags[ni].name);
4078 Tags[ni].open (html, tag, tagsize);
4079
4080 if (! S_TOP(html)->display_none) {
4081 switch (html->style ()->display) {
4082 case DISPLAY_BLOCK:
4083 Html_display_block(html);
4084 break;
4087 break;
4088 case DISPLAY_LIST_ITEM:
4090 break;
4091 case DISPLAY_NONE:
4092 S_TOP(html)->display_none = true;
4093 break;
4094 case DISPLAY_INLINE:
4095 if (html->style()->vloat != FLOAT_NONE)
4096 Html_display_block(html);
4097 break;
4098 default:
4099 break;
4100 }
4101
4102 if (Tags[ni].content && ! S_TOP(html)->display_none) {
4103 Tags[ni].content (html, tag, tagsize);
4104 }
4105 }
4106
4107 if (html->stop_parser)
4108 break;
4109
4110 if (S_TOP(html)->parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
4111 /* don't change anything */
4112 } else if (S_TOP(html)->parse_mode != DILLO_HTML_PARSE_MODE_PRE &&
4113 (html->style ()->whiteSpace == WHITE_SPACE_PRE ||
4114 html->style ()->whiteSpace == WHITE_SPACE_PRE_WRAP)) {
4115 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_PRE;
4116 html->pre_column = 0;
4117 html->PreFirstChar = true;
4118 }
4119
4120 if (html->styleEngine->getId ())
4121 Html_add_anchor(html, html->styleEngine->getId ());
4122
4123 /* Request immediate close for elements with forbidden close tag. */
4124 /* TODO: XHTML always requires close tags. A simple implementation
4125 * of the commented clause below will make it work. */
4126 if (/* parsing HTML && */ Tags[ni].EndTag == 'F')
4127 html->ReqTagClose = true;
4128
4129 /* Don't break! Open tags may also close themselves */
4130
4131 default:
4132 /* Close function */
4133
4134 /* Test for </x>, ReqTagClose, <x /> and <x/> */
4135 if (*start == '/' || /* </x> */
4136 html->ReqTagClose || /* request */
4137 (tag[tagsize-2] == '/' && /* XML: */
4138 (strchr(" \"'", tag[tagsize-3]) || /* [ "']/> */
4139 (size_t)tagsize == strlen(Tags[ni].name) + 3))) { /* <x/> */
4140
4141 _MSG("Html_process_tag Close: %s\n", Tags[ni].name);
4142 Html_tag_cleanup_at_close(html, ni);
4143 /* This was a close tag */
4144 html->ReqTagClose = false;
4145 }
4146 }
4147}
4148
4159static const char *Html_get_attr2(DilloHtml *html,
4160 const char *tag,
4161 int tagsize,
4162 const char *attrname,
4163 int tag_parsing_flags)
4164{
4165 int i, entsize, Found = 0, delimiter = 0, attr_pos = 0;
4166 Dstr *Buf = html->attr_data;
4168
4169 dReturn_val_if_fail(*attrname, NULL);
4170
4171 dStr_truncate(Buf, 0);
4172
4173 for (i = 1; i < tagsize; ++i) {
4174 switch (state) {
4175 case SEEK_ATTR_START:
4176 if (isspace(tag[i]))
4177 state = SEEK_TOKEN_START;
4178 else if (tag[i] == '=')
4179 state = SEEK_VALUE_START;
4180 break;
4181
4182 case MATCH_ATTR_NAME:
4183 if (!attrname[attr_pos] &&
4184 (tag[i] == '=' || isspace(tag[i]) || tag[i] == '>')) {
4185 Found = 1;
4186 state = SEEK_TOKEN_START;
4187 --i;
4188 } else if (!tag[i]) {
4189 state = SEEK_ATTR_START; // NULL byte is not allowed
4190 } else {
4191 if (D_ASCII_TOLOWER(tag[i]) != D_ASCII_TOLOWER(attrname[attr_pos]))
4192 state = SEEK_ATTR_START;
4193 attr_pos++;
4194 }
4195 break;
4196
4197 case SEEK_TOKEN_START:
4198 if (tag[i] == '=') {
4199 state = SEEK_VALUE_START;
4200 } else if (!isspace(tag[i])) {
4201 attr_pos = 0;
4202 state = (Found) ? FINISHED : MATCH_ATTR_NAME;
4203 --i;
4204 }
4205 break;
4206 case SEEK_VALUE_START:
4207 if (!isspace(tag[i])) {
4208 delimiter = (tag[i] == '"' || tag[i] == '\'') ? tag[i] : ' ';
4209 i -= (delimiter == ' ');
4210 state = (Found) ? GET_VALUE : SKIP_VALUE;
4211 }
4212 break;
4213
4214 case SKIP_VALUE:
4215 if ((delimiter == ' ' && isspace(tag[i])) || tag[i] == delimiter)
4216 state = SEEK_TOKEN_START;
4217 break;
4218 case GET_VALUE:
4219 if ((delimiter == ' ' && (isspace(tag[i]) || tag[i] == '>')) ||
4220 tag[i] == delimiter) {
4221 state = FINISHED;
4222 } else if (tag[i] == '&' &&
4223 (tag_parsing_flags & HTML_ParseEntities)) {
4224 const char *entstr;
4225 const bool_t is_attr = TRUE;
4226
4227 if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize,
4228 is_attr))) {
4229 dStr_append(Buf, entstr);
4230 i += entsize-1;
4231 } else {
4232 dStr_append_c(Buf, tag[i]);
4233 }
4234 } else if (tag[i] == '\r' || tag[i] == '\t') {
4235 dStr_append_c(Buf, ' ');
4236 } else if (tag[i] == '\n') {
4237 /* ignore */
4238 } else {
4239 dStr_append_c(Buf, tag[i]);
4240 }
4241 break;
4242
4243 case FINISHED:
4244 i = tagsize;
4245 break;
4246 }
4247 }
4248
4249 if (tag_parsing_flags & HTML_LeftTrim)
4250 while (isspace(Buf->str[0]))
4251 dStr_erase(Buf, 0, 1);
4252 if (tag_parsing_flags & HTML_RightTrim)
4253 while (Buf->len && isspace(Buf->str[Buf->len - 1]))
4254 dStr_truncate(Buf, Buf->len - 1);
4255
4256 return (Found) ? Buf->str : NULL;
4257}
4258
4262const char *a_Html_get_attr(DilloHtml *html,
4263 const char *tag,
4264 int tagsize,
4265 const char *attrname)
4266{
4267 return Html_get_attr2(html, tag, tagsize, attrname,
4269}
4270
4277 const char *tag,
4278 int tagsize,
4279 const char *attrname,
4280 const char *def)
4281{
4282 const char *attrbuf = a_Html_get_attr(html, tag, tagsize, attrname);
4283
4284 return attrbuf ? dStrdup(attrbuf) : dStrdup(def);
4285}
4286
4295static void Html_callback(int Op, CacheClient_t *Client)
4296{
4297 DilloHtml *html = (DilloHtml*)Client->CbData;
4298
4299 if (Op) { /* EOF */
4300 html->write((char*)Client->Buf, Client->BufSize, 1);
4301 html->finishParsing(Client->Key);
4302 } else {
4303 html->write((char*)Client->Buf, Client->BufSize, 0);
4304 }
4305}
4306
4311static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
4312{
4313 char ch = 0, *p, *text;
4314 int token_start, buf_index;
4315
4316 /* Now, 'buf' and 'bufsize' define a buffer aligned to start at a token
4317 * boundary. Iterate through tokens until end of buffer is reached. */
4318 buf_index = 0;
4319 token_start = buf_index;
4320 while ((buf_index < bufsize) && !html->stop_parser) {
4321 /* invariant: buf_index == bufsize || token_start == buf_index */
4322
4323 if (S_TOP(html)->parse_mode ==
4325 /* Non HTML code here, let's skip until closing tag */
4326 do {
4327 const char *tag = Tags[S_TOP(html)->tag_idx].name;
4328 buf_index += strcspn(buf + buf_index, "<");
4329 if (buf_index + (int)strlen(tag) + 3 > bufsize) {
4330 buf_index = bufsize;
4331 } else if (strncmp(buf + buf_index, "</", 2) == 0 &&
4332 Html_match_tag(tag, buf+buf_index+2, strlen(tag)+1)) {
4333 /* copy VERBATIM text into the stash buffer */
4334 text = dStrndup(buf + token_start, buf_index - token_start);
4335 dStr_append(html->Stash, text);
4336 dFree(text);
4337 token_start = buf_index;
4338 break;
4339 } else
4340 ++buf_index;
4341 } while (buf_index < bufsize);
4342
4343 if (buf_index == bufsize)
4344 break;
4345 }
4346
4347 if (isspace(buf[buf_index])) {
4348 /* whitespace: group all available whitespace */
4349 while (++buf_index < bufsize && isspace(buf[buf_index])) ;
4350 Html_process_space(html, buf + token_start, buf_index - token_start);
4351 token_start = buf_index;
4352
4353 } else if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4354 (isalpha(ch) || strchr("/!?", ch)) ) {
4355 /* Tag */
4356 if (buf_index + 3 < bufsize && !strncmp(buf + buf_index, "<!--", 4)) {
4357 /* Comment: search for close of comment, skipping over
4358 * everything except a matching "-->" tag. */
4359 while ( (p = (char*) memchr(buf + buf_index, '>',
4360 bufsize - buf_index)) ){
4361 buf_index = p - buf + 1;
4362 if (p[-1] == '-' && p[-2] == '-') break;
4363 }
4364 if (p) {
4365 /* Got the whole comment. Let's throw it away! :) */
4366 token_start = buf_index;
4367 } else
4368 buf_index = bufsize;
4369 } else {
4370 /* Tag: search end of tag (skipping over quoted strings) */
4371 html->CurrOfs = html->Start_Ofs + token_start;
4372
4373 while ( buf_index < bufsize ) {
4374 buf_index++;
4375 buf_index += strcspn(buf + buf_index, ">\"'<");
4376 if ((ch = buf[buf_index]) == '>') {
4377 break;
4378 } else if (ch == '"' || ch == '\'') {
4379 /* Skip over quoted string */
4380 buf_index++;
4381 buf_index += strcspn(buf + buf_index,
4382 (ch == '"') ? "\">" : "'>");
4383 if (buf[buf_index] == '>') {
4384 /* Unterminated string value? Let's look ahead and test:
4385 * (<: unterminated, closing-quote: terminated) */
4386 int offset = buf_index + 1;
4387 offset += strcspn(buf + offset,
4388 (ch == '"') ? "\"<" : "'<");
4389 if (buf[offset] == ch || !buf[offset]) {
4390 buf_index = offset;
4391 } else {
4392 BUG_MSG("Attribute lacks closing quote.");
4393 break;
4394 }
4395 }
4396 } else if (ch == '<') {
4397 /* unterminated tag detected */
4398 p = dStrndup(buf+token_start+1,
4399 strcspn(buf+token_start+1, " <\n\r\t"));
4400 BUG_MSG("<%s> lacks its closing '>'.", p);
4401 dFree(p);
4402 --buf_index;
4403 break;
4404 }
4405 }
4406 if (buf_index < bufsize) {
4407 buf_index++;
4408 Html_process_tag(html, buf + token_start,
4409 buf_index - token_start);
4410 token_start = buf_index;
4411 }
4412 }
4413 } else {
4414 /* A Word: search for whitespace or tag open */
4415 html->CurrOfs = html->Start_Ofs + token_start;
4416
4417 while (++buf_index < bufsize) {
4418 buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v");
4419 if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4420 !isalpha(ch) && !strchr("/!?", ch))
4421 continue;
4422 break;
4423 }
4424 if (buf_index < bufsize || Eof) {
4425 /* successfully found end of token */
4426 ch = buf[buf_index];
4427 buf[buf_index] = 0;
4428 Html_process_word(html, buf + token_start,
4429 buf_index - token_start);
4430 buf[buf_index] = ch;
4431 token_start = buf_index;
4432 }
4433 }
4434 }/*while*/
4435
4436 HT2TB(html)->flush ();
4437
4438 return token_start;
4439}
4440
4441
#define B8(d)
Definition binaryconst.h:32
#define _MSG(...)
Definition bookmarks.c:45
#define MSG(...)
Definition bookmarks.c:46
bool_t a_Bw_expecting(BrowserWindow *bw)
Definition bw.c:334
void a_Bw_close_client(BrowserWindow *bw, int ClientKey)
Close a cache-client upon successful retrieval.
Definition bw.c:167
void a_Bw_add_url(BrowserWindow *bw, const DilloUrl *Url)
Add an URL to the browser window's list.
Definition bw.c:209
void a_Bw_add_client(BrowserWindow *bw, int Key, int Root)
Add a reference to a cache-client.
Definition bw.c:128
void a_Bw_remove_doc(BrowserWindow *bw, void *vdoc)
Remove a document from the bw's list.
Definition bw.c:263
void a_Bw_add_doc(BrowserWindow *bw, void *vdoc)
Add a document to the browser window's list.
Definition bw.c:221
void(* CA_Callback_t)(int Op, CacheClient_t *Client)
Callback type for cache clients.
Definition cache.h:43
int a_Capi_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize)
Get the cache's buffer for the URL, and its size.
Definition capi.c:541
const char * a_Capi_set_content_type(const DilloUrl *url, const char *ctype, const char *from)
Set the Content-Type for the URL.
Definition capi.c:565
int a_Capi_dpi_verify_request(BrowserWindow *bw, DilloUrl *url)
Safety test: only allow GET|POST dpi-urls from dpi-generated pages.
Definition capi.c:232
void a_Capi_unref_buf(const DilloUrl *Url)
Unref the cache's buffer when no longer using it.
Definition capi.c:549
int a_Capi_open_url(DilloWeb *web, CA_Callback_t Call, void *CbData)
Most used function for requesting a URL.
Definition capi.c:392
int a_Capi_get_flags_with_redirection(const DilloUrl *Url)
Same as a_Capi_get_flags() but following redirections.
Definition capi.c:530
#define CAPI_IsCached
Definition capi.h:15
#define CAPI_Completed
Definition capi.h:19
A list of CssProperty objects.
Definition css.hh:328
bool enter(dw::core::Widget *widget, int link, int img, int x, int y)
Called, when a link is entered, left, or the position has changed.
Definition html.cc:739
bool press(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "press" signal.
Definition html.cc:759
bool click(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "click" signal.
Definition html.cc:793
bool ReqTagClose
Flag to close the stack's top tag.
void freeParseData()
Free parsing data.
Definition html.cc:626
int InFlags
tracks which elements we are in
DilloHtmlDocumentType DocType
DilloUrl * base_url
int getCurrLineNumber()
Return the line number of the tag/word being processed by the parser.
Definition html.cc:604
bool stop_parser
void bugMessage(const char *format,...)
Collect HTML error strings.
Definition html.cc:152
lout::misc::SimpleVector< DilloHtmlState > * stack
void write(char *Buf, int BufSize, int Eof)
Process the newly arrived html and put it into the page structure.
Definition html.cc:577
dw::core::style::Style * backgroundStyle()
char * Start_Buf
int formNew(DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc enc, const char *charset)
Allocate and insert form information.
Definition html.cc:666
void finishParsing(int ClientKey)
Finish parsing a HTML page.
Definition html.cc:640
bool PreFirstChar
used to skip the first CR or CRLF in PRE tags
void startElement(int tag)
size_t CurrOfs
void addCssUrl(const DilloUrl *url)
Save URL in a vector (may be loaded later).
Definition html.cc:732
void initDw()
Miscellaneous initializations for Dw.
Definition html.cc:521
bool PrevWasCR
Flag to help parsing of "\r\n" in PRE tags.
float DocTypeVersion
Dstr * Stash
bool TagSoup
Flag to enable the parser's cleanup functions.
bool_t unloadedImages()
Definition html.cc:688
int pre_column
current column, used in PRE tags with tabs
DilloUrl * page_url
lout::misc::SimpleVector< DilloHtmlInput * > * inputs_outside_form
bool PrevWasHtmlClose
set when </html> is found
void restyle()
uchar_t Num_BODY
dw::core::style::Style * wordStyle()
bool StashSpace
BrowserWindow * bw
int32_t non_css_visited_color
as provided by vlink attribute in BODY
size_t OldLine
bool loadCssFromStash
current stash content should be loaded as CSS
lout::misc::SimpleVector< DilloHtmlForm * > * forms
dw::ImageMapsList maps
DilloHtml(BrowserWindow *bw, const DilloUrl *url, const char *content_type)
Create and initialize a new DilloHtml class.
Definition html.cc:431
~DilloHtml()
Free memory used by the DilloHtml class.
Definition html.cc:535
uchar_t Num_TITLE
dw::core::style::Style * style()
size_t OldOfs
uchar_t Num_HTML
element counters: used for validation purposes.
lout::misc::SimpleVector< DilloUrl * > * links
dw::core::Widget * dw
int32_t non_css_link_color
as provided by link attribute in BODY
char * content_type
Dstr * attr_data
Buffer for attribute value.
bool PrevWasBodyClose
set when </body> is found
lout::misc::SimpleVector< DilloUrl * > * cssUrls
int32_t visited_color
as computed according to CSS
uchar_t Num_HEAD
char * charset
bool InVisitedLink
used to 'contrast_visited_colors'
StyleEngine * styleEngine
lout::misc::SimpleVector< DilloHtmlImage * > * images
HtmlLinkReceiver linkReceiver
DilloHtmlForm * getCurrentForm()
Get the current form.
Definition html.cc:683
void loadImages(const DilloUrl *pattern)
Load images if they were disabled.
Definition html.cc:701
This class provides the glue between HTML parser and CSS subsystem.
void parse(DilloHtml *html, DilloUrl *url, const char *buf, int buflen, CssOrigin origin)
void setPseudoVisited()
set the CSS pseudo class :visited.
void setNonCssHint(CssPropertyName name, CssValueType type, CssPropertyValue value)
void setId(const char *id)
const char * getId()
dw::core::style::Color * backgroundColor()
void endElement(int tag)
tell the styleEngine that a html element has ended.
void setPseudoLink()
set the CSS pseudo class :link.
void setStyle(const char *style)
void setClass(const char *klass)
void inheritBackgroundColor()
Use of the background color of the parent style as default.
dw::core::style::StyleImage * backgroundImage(dw::core::style::BackgroundRepeat *bgRepeat, dw::core::style::BackgroundAttachment *bgAttachment, dw::core::style::Length *bgPositionX, dw::core::style::Length *bgPositionY)
Displays different kind of bullets.
Definition bullet.hh:15
void addShapeToCurrentMap(core::Shape *shape, int link)
Add a shape to the current map-.
Definition image.cc:108
void startNewMap(lout::object::Object *key)
Start a new map and make it the current one.
Definition image.cc:96
void setCurrentMapDefaultLink(int link)
Set default link for current map-.
Definition image.cc:116
Displays an instance of dw::core::Imgbuf.
Definition image.hh:120
void setIsMap()
Sets image as server side image map.
Definition image.cc:539
void setUseMap(ImageMapsList *list, Object *key)
Sets image as client side image map.
Definition image.cc:552
void forceMapRedraw()
Definition image.hh:179
void initWithText(const char *text, core::style::Style *style)
Definition listitem.cc:58
void initWithWidget(core::Widget *widget, core::style::Style *style)
Definition listitem.cc:48
Widget for drawing (horizontal) rules.
Definition ruler.hh:21
A Widget for rendering text blocks, i.e.
Definition textblock.hh:206
void addWidget(core::Widget *widget, core::style::Style *style)
Add a widget (word type) to the page.
void addSpace(core::style::Style *style)
?
void addText(const char *text, size_t len, core::style::Style *style)
Add a word to the page structure.
void addParbreak(int space, core::style::Style *style)
Cause a paragraph break.
static int CLASS_ID
Definition textblock.hh:867
dw::core::Shape implemtation for simple circles.
Definition types.hh:91
Represents a button press or release event.
Definition events.hh:58
ButtonState state
Definition events.hh:42
dw::core::Shape implemtation for polygons.
Definition types.hh:105
void addPoint(int x, int y)
Definition types.cc:156
dw::core::Shape implemtation for simple rectangles.
Definition types.hh:70
Abstract interface for different shapes.
Definition types.hh:59
The base class of all dillo widgets.
Definition widget.hh:24
virtual void setStyle(style::Style *style)
Change the style of a widget.
Definition widget.cc:1233
Widget * getParent()
Definition widget.hh:552
Layout * getLayout()
Definition widget.hh:567
style::Color * getBgColor()
Get the actual background of a widget.
Definition widget.cc:1348
ListStyleType listStyleType
Definition style.hh:567
static Style * create(StyleAttrs *attrs)
Definition style.hh:628
Simple (simpler than container::untyped::Vector and container::typed::Vector) template based vector.
Definition misc.hh:94
void setSize(int newSize)
Set the size explicitly.
Definition misc.hh:167
void increase()
Increase the vector size by one.
Definition misc.hh:160
void set(int i, T t)
Store an object in the vector.
Definition misc.hh:246
T get(int i) const
Return the one element, explicitly.
Definition misc.hh:201
int size() const
Return the number of elements put into this vector.
Definition misc.hh:141
T * getRef(int i) const
Return the reference of one element.
Definition misc.hh:190
int32_t a_Color_parse(const char *str, int32_t default_color, int *err)
Parse a color string.
Definition colors.c:258
int32_t a_Color_vc(int32_t candidate, int32_t C_txt, int32_t C_lnk, int32_t C_bg)
Return a suitable "visited link" color.
Definition colors.c:344
@ CSS_PROPERTY_LIST_STYLE_TYPE
Definition css.hh:208
@ CSS_PROPERTY_MARGIN_BOTTOM
Definition css.hh:209
@ CSS_PROPERTY_BORDER_RIGHT_STYLE
Definition css.hh:176
@ CSS_PROPERTY_MARGIN_RIGHT
Definition css.hh:211
@ PROPERTY_X_LANG
Definition css.hh:247
@ CSS_PROPERTY_BORDER_RIGHT_WIDTH
Definition css.hh:177
@ CSS_PROPERTY_BORDER_LEFT_WIDTH
Definition css.hh:174
@ PROPERTY_X_LINK
Definition css.hh:246
@ PROPERTY_X_TOOLTIP
Definition css.hh:249
@ CSS_PROPERTY_MARGIN_LEFT
Definition css.hh:210
@ CSS_PROPERTY_HEIGHT
Definition css.hh:202
@ CSS_PROPERTY_WIDTH
Definition css.hh:240
@ PROPERTY_X_IMG
Definition css.hh:248
@ CSS_PROPERTY_BORDER_TOP_WIDTH
Definition css.hh:181
@ CSS_PROPERTY_MARGIN_TOP
Definition css.hh:212
@ CSS_PROPERTY_VERTICAL_ALIGN
Definition css.hh:237
@ CSS_PROPERTY_BORDER_LEFT_STYLE
Definition css.hh:173
@ CSS_PROPERTY_TEXT_ALIGN
Definition css.hh:230
@ CSS_PROPERTY_BORDER_TOP_STYLE
Definition css.hh:180
@ CSS_PROPERTY_BACKGROUND_COLOR
Definition css.hh:164
@ CSS_PROPERTY_COLOR
Definition css.hh:186
@ CSS_PROPERTY_BORDER_BOTTOM_STYLE
Definition css.hh:169
@ CSS_PROPERTY_BORDER_BOTTOM_WIDTH
Definition css.hh:170
@ CSS_PROPERTY_FONT_FAMILY
Definition css.hh:195
@ CSS_TYPE_ENUM
Value is i, if represented by enum_symbols[i].
Definition css.hh:39
@ CSS_TYPE_INTEGER
This type is only used internally, for x-* properties.
Definition css.hh:37
@ CSS_TYPE_LENGTH_PERCENTAGE
<length> or <percentage>.
Definition css.hh:43
@ CSS_TYPE_STRING
<string>
Definition css.hh:58
@ CSS_TYPE_SYMBOL
Symbols, which are directly copied (as opposed to CSS_TYPE_ENUM and CSS_TYPE_MULTI_ENUM).
Definition css.hh:59
@ CSS_TYPE_COLOR
Represented as integer.
Definition css.hh:55
int CssLength
Lengths are represented as int in the following way:
Definition css.hh:91
CssLengthType CSS_LENGTH_TYPE(CssLength l)
Definition css.hh:138
CssLength CSS_CREATE_LENGTH(float v, CssLengthType t)
Definition css.hh:106
@ CSS_LENGTH_TYPE_PX
Definition css.hh:95
@ CSS_LENGTH_TYPE_RELATIVE
This does not exist in CSS but is used in HTML.
Definition css.hh:101
@ CSS_LENGTH_TYPE_PERCENTAGE
Definition css.hh:100
@ CSS_LENGTH_TYPE_AUTO
This can be used as a simple value.
Definition css.hh:103
float CSS_LENGTH_VALUE(CssLength l)
Definition css.hh:142
@ CSS_ORIGIN_AUTHOR
Definition css.hh:33
unsigned int uint_t
Definition d_size.h:20
unsigned char bool_t
Definition d_size.h:21
char * a_Url_decode_hex_str(const char *str, size_t *p_sz)
Definition datauri.c:136
static const int bufsize
Definition decode.c:21
char * dStrconcat(const char *s1,...)
Concatenate a NULL-terminated list of strings.
Definition dlib.c:100
void dFree(void *mem)
Definition dlib.c:66
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:201
void dStr_sprintfa(Dstr *ds, const char *format,...)
Printf-like function that appends.
Definition dlib.c:462
void dStr_append(Dstr *ds, const char *s)
Append a C string to a Dstr.
Definition dlib.c:314
char * dStrdup(const char *s)
Definition dlib.c:75
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:252
int dStrnAsciiCasecmp(const char *s1, const char *s2, size_t n)
Definition dlib.c:213
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
Definition dlib.c:386
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:335
char * dStriAsciiStr(const char *haystack, const char *needle)
Case insensitive strstr.
Definition dlib.c:182
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:306
void dStr_append_c(Dstr *ds, int c)
Append one character.
Definition dlib.c:347
char * dStrndup(const char *s, size_t sz)
Definition dlib.c:86
void dStr_sprintf(Dstr *ds, const char *format,...)
Printf-like function.
Definition dlib.c:448
void dStr_vsprintfa(Dstr *ds, const char *format, va_list argp)
vsprintf-like function that appends.
Definition dlib.c:399
Dstr * dStr_new(const char *s)
Create a new string.
Definition dlib.c:323
void dStr_truncate(Dstr *ds, int len)
Truncate a Dstr to be 'len' bytes long.
Definition dlib.c:366
char * dStrnfill(size_t len, char c)
Return a new string of length 'len' filled with 'c' characters.
Definition dlib.c:147
#define dReturn_if_fail(expr)
Definition dlib.h:72
#define dReturn_val_if_fail(expr, val)
Definition dlib.h:76
#define dReturn_if(expr)
Definition dlib.h:64
#define dReturn_val_if(expr, val)
Definition dlib.h:68
#define D_ASCII_TOLOWER(c)
Definition dlib.h:37
#define TRUE
Definition dlib.h:23
#define FALSE
Definition dlib.h:19
#define dNew(type, count)
Definition dlib.h:49
@ SKIP_VALUE
Definition dpip.c:70
static Image * image
#define IMAGE_MAX_AREA
Definition fltkimgbuf.cc:27
void a_History_set_title_by_url(const DilloUrl *url, const char *title)
Set the page-title for a given URL.
Definition history.c:130
static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2507
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
Handle open TITLE.
Definition html.cc:1687
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4262
static misc::SimpleVector< int > * Html_read_coords(DilloHtml *html, const char *str)
Read coords in a string, returning a vector of ints.
Definition html.cc:2362
DilloHtmlTagParsingFlags
Definition html.cc:86
@ HTML_RightTrim
Definition html.cc:88
@ HTML_LeftTrim
Definition html.cc:87
@ HTML_ParseEntities
Definition html.cc:89
static int i_HR
Definition html.cc:131
static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2690
static void Html_css_load_callback(int Op, CacheClient_t *Client)
Called by the network engine when a stylesheet has new data.
Definition html.cc:3229
static void Html_real_pop_tag(DilloHtml *html)
Pop the top tag in the stack.
Definition html.cc:1374
static void Html_tag_open_blockquote(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2769
static int i_OPTGROUP
Definition html.cc:133
static void Html_push_tag(DilloHtml *html, int tag_idx)
Push the tag (copying attributes from the top of the stack)
Definition html.cc:1348
static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2837
static void Html_tag_close_a(DilloHtml *html)
Definition html.cc:2760
bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the VALIGN attribute (top|bottom|middle|baseline) and sets the style in style_attrs.
Definition html.cc:362
static const TagInfo Tags[]
Function index for the open, content, and close functions for each tag.
Definition html.cc:3465
static int Html_triggers_optional_close(int old_idx, int cur_idx)
For elements with optional close, check whether is time to close, by also following Firefox's de fact...
Definition html.cc:3633
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
Handle open HTML element.
Definition html.cc:1605
static bool Html_must_add_breaks(DilloHtml *html)
Definition html.cc:423
static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2311
static void Html_add_anchor(DilloHtml *html, const char *name)
Register an anchor for this page.
Definition html.cc:2671
static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3382
static void Html_free(void *data)
Definition html.cc:221
static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3047
static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1782
static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
HTML, HEAD and BODY elements have optional open and close tags.
Definition html.cc:3840
static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
Create a new link, set it as the url's parent and return the index.
Definition html.cc:305
void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
Used by the "Show/Hide hiddens" form menuitem.
Definition html.cc:278
static bool Html_contains_form(DilloHtml *html, void *v_form)
Search for form.
Definition html.cc:239
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
Handle DOCTYPE declaration.
Definition html.cc:1519
static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2778
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:171
static void Html_tag_content_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1939
static int i_TH
Definition html.cc:140
static void Html_display_listitem(DilloHtml *html)
Definition html.cc:3982
static int Html_charref_comp(const void *a, const void *b)
Comparison function for binary search.
Definition html.cc:957
static void Html_tag_open_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1914
static const char * Html_parse_entity(DilloHtml *html, const char *token, int toksize, int *entsize, bool_t is_attr)
Given an entity, return the corresponding string.
Definition html.cc:1034
static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2804
DilloHtmlTagParsingState
Definition html.cc:76
@ SKIP_VALUE
Definition html.cc:81
@ SEEK_TOKEN_START
Definition html.cc:79
@ MATCH_ATTR_NAME
Definition html.cc:78
@ SEEK_ATTR_START
Definition html.cc:77
@ SEEK_VALUE_START
Definition html.cc:80
@ FINISHED
Definition html.cc:83
@ GET_VALUE
Definition html.cc:82
static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace, bool addBreakOpt)
Create and add a new Textblock to the current Textblock.
Definition html.cc:392
#define NTAGS
Definition html.cc:3578
static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1995
static void Html_process_word(DilloHtml *html, const char *word, int size)
Handles putting the word into its proper place.
Definition html.cc:1215
static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2470
static int i_BUTTON
Definition html.cc:127
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:317
static int i_DT
Definition html.cc:129
static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2853
static int i_SELECT
Definition html.cc:136
static void Html_display_block(DilloHtml *html)
Definition html.cc:3971
static void Html_tag_close_li(DilloHtml *html)
Definition html.cc:2939
static void Html_tag_close_pre(DilloHtml *html)
Definition html.cc:3058
static void Html_tag_content_frameset(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1984
static Charref_t * Html_charref_search(char *key)
Binary search of 'key' in charref list.
Definition html.cc:965
static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
Parse the LINK element (Only CSS stylesheets by now).
Definition html.cc:3295
static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2007
static void Html_tag_close_media(DilloHtml *html)
Media (AUDIO/VIDEO) close function.
Definition html.cc:2610
static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
Parse attributes that can appear on any tag.
Definition html.cc:3894
static const char * Html_parse_numeric_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a numeric character reference (e.g., "&#47;" or "&#x2F;").
Definition html.cc:875
static void Html_tag_content_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2496
static void Html_tag_open_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2572
static int i_A
Definition html.cc:125
static void Html_tag_close_q(DilloHtml *html)
Definition html.cc:2793
static void Html_tag_close_map(DilloHtml *html)
Handle close <MAP>.
Definition html.cc:2336
static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
This function is called after popping the stack, to handle nested Textblock widgets.
Definition html.cc:1335
static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2948
static CssLength Html_parse_length_or_multi_length(const char *attr, char **endptr)
Definition html.cc:1393
static void Html_process_space_pre_line(DilloHtml *html, const char *space, int spacesize)
For white-space: pre-line, we must break the line if encountering a newline.
Definition html.cc:1107
static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx)
Conditional cleanup of the stack, called before closing any tag.
Definition html.cc:3793
static void Html_process_space(DilloHtml *html, const char *space, int spacesize)
Parse spaces.
Definition html.cc:1129
void a_Html_form_submit(void *v_html, void *v_form)
Used by the "Submit form" form menuitem.
Definition html.cc:252
static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3039
static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3006
static const char * Html_get_attr2(DilloHtml *html, const char *tag, int tagsize, const char *attrname, int tag_parsing_flags)
Get attribute value for 'attrname' and return it.
Definition html.cc:4159
static int i_TD
Definition html.cc:138
static int i_TR
Definition html.cc:139
static void Html_callback(int Op, CacheClient_t *Client)
Dispatch the apropriate function for 'Op'.
Definition html.cc:4295
static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
Process a tag, given as 'tag' and 'tagsize'.
Definition html.cc:4020
static void Html_tag_close_par(DilloHtml *html)
Default close for paragraph tags - pop the stack and break.
Definition html.cc:3424
void(* TagCloseFunct)(DilloHtml *html)
Definition html.cc:74
static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
Set the URL data for image maps.
Definition html.cc:291
static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
Create a new Image struct and request the image-url to the cache.
Definition html.cc:2265
static int i_DD
Definition html.cc:128
static void Html_update_content_type(DilloHtml *html, const char *content)
Update the document's content type information based on meta tag data.
Definition html.cc:3090
void(* TagOpenFunct)(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:73
static void Html_tag_open_default(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3374
static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2619
static int Html_ms_stupid_quotes_2ucs(int codepoint)
This is M$ non-standard "smart quotes" (w1252).
Definition html.cc:855
#define TAB_SIZE
Definition html.cc:58
static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2202
void a_Html_load_images(void *v_html, DilloUrl *pattern)
Used by the "Load images" page menuitem.
Definition html.cc:229
static const char * Html_get_javascript_link(DilloHtml *html)
Test and extract the link from a javascript instruction.
Definition html.cc:2648
static void Html_tag_content_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2600
static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
Here's where we parse the html and put it into the Textblock structure.
Definition html.cc:4311
static int i_OPTION
Definition html.cc:134
static int Html_forbids_cross_nesting(const int InFlags, const int new_idx)
Check nesting and cross-nesting between BUTTON, SELECT, TEXTAREA and A.
Definition html.cc:3672
static void Html_tag_close_html(DilloHtml *html)
Handle close HTML element.
Definition html.cc:1624
static void Html_tag_close_title(DilloHtml *html)
Handle close TITLE.
Definition html.cc:1707
static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
Handle META We do not support http-equiv=refresh with delay>0 because it's non standard,...
Definition html.cc:3116
static int i_LI
Definition html.cc:132
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
Handle open SCRIPT.
Definition html.cc:1725
static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2045
static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3432
static void Html_tag_open_sectioning(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3399
static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3030
static int i_BODY
Definition html.cc:126
void * a_Html_text(const char *type, void *P, CA_Callback_t *Call, void **Data)
Set callback function and callback data for the "html/text" MIME type.
Definition html.cc:210
static bool Html_load_image(BrowserWindow *bw, DilloUrl *url, const DilloUrl *requester, DilloImage *image)
Tell cache to retrieve image.
Definition html.cc:2184
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
Push the tag (used to force en element with optional open into the stack).
Definition html.cc:1365
#define SGML_SPCDEL
Definition html.cc:56
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
Handle open STYLE.
Definition html.cc:1743
static const char * Html_parse_named_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a named character reference (e.g., "&" or "…").
Definition html.cc:978
static int Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
Check that 'val' is composed of characters inside [A-Za-z0-9:_.
Definition html.cc:1475
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3606
static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
Set the Document Base URI.
Definition html.cc:3345
static void Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2392
static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
Does the tag in tagstr (e.g.
Definition html.cc:1317
static void Html_tag_close_body(DilloHtml *html)
Definition html.cc:1888
static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2540
static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2015
static void Html_tag_close_head(DilloHtml *html)
Handle close HEAD element.
Definition html.cc:1661
static int i_TEXTAREA
Definition html.cc:137
static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx, int new_idx, int fi, char op)
Cleanup the stack to a given index.
Definition html.cc:3691
static void Html_tag_content_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2639
static void Html_tag_close_script(DilloHtml *html)
Handle close SCRIPT.
Definition html.cc:1734
static void Html_display_inline_block(DilloHtml *html)
Definition html.cc:3977
static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
Conditional cleanup of the stack (at open time).
Definition html.cc:3743
static int i_HTML
Definition html.cc:130
static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
Check whether a tag is in the "excluding" element set for PRE.
Definition html.cc:3067
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
Handle open HEAD element.
Definition html.cc:1636
static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2907
static int i_P
Definition html.cc:135
static int Html_tag_compare(const char *p1, const char *p2)
Definition html.cc:3591
static void Html_check_html5_obsolete(DilloHtml *html, int ni)
Warn when encountering elements that are obsolete in HTML5.
Definition html.cc:3948
static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1902
static void Html_tag_close_style(DilloHtml *html)
Handle close STYLE.
Definition html.cc:1772
static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2870
void a_Html_form_reset(void *v_html, void *v_form)
Used by the "Reset form" form menuitem.
Definition html.cc:265
static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3020
static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3415
#define HTML_NTAGS
Definition html.hh:24
#define NumRef
static const Charref_t Charrefs[NumRef]
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4262
void a_Html_stash_init(DilloHtml *html)
Initialize the stash buffer.
Definition html.cc:842