doxygen/misc_8c_source.html

/*

 * File: misc.c

 *

 * Copyright (C) 2000-2007 Jorge Arellano Cid <jcid@dillo.org>,

 *

 * This program is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation; either version 3 of the License, or

 * (at your option) any later version.

 */


#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <ctype.h>

#include <assert.h>


#include "utf8.hh"

#include "msg.h"

#include "misc.h"


char *a_Misc_escape_chars(const char *str, const char *esc_set)

{

   static const char *const hex = "0123456789ABCDEF";

   char *p = NULL;

   Dstr *dstr;

   int i;


   dstr = dStr_sized_new(64);

   for (i = 0; str[i]; ++i) {

      if (str[i] <= 0x1F || str[i] == 0x7F || strchr(esc_set, str[i])) {

         dStr_append_c(dstr, '%');

         dStr_append_c(dstr, hex[(str[i] >> 4) & 15]);

         dStr_append_c(dstr, hex[str[i] & 15]);

      } else {

         dStr_append_c(dstr, str[i]);

      }

   }

   p = dstr->str;

   dStr_free(dstr, FALSE);


   return p;

}

char *a_Misc_escape_chars(const char *str, const char *esc_set) {…}


#define TAB_SIZE 8

int


a_Misc_expand_tabs(char **start, char *end, char *buf, int buflen)

{

   int j, pos = 0, written = 0, old_pos, char_len;

   uint_t code;

   static const int combining_char_space = 32;


   while (*start < end && written < buflen - TAB_SIZE - combining_char_space) {

      code = a_Utf8_decode(*start, end, &char_len);


      if (code == '\t') {

         /* Fill with whitespaces until the next tab. */

         old_pos = pos;

         pos += TAB_SIZE - (pos % TAB_SIZE);

         for (j = old_pos; j < pos; j++)

            buf[written++] = ' ';

      } else {

         assert(char_len <= 4);

         for (j = 0; j < char_len; j++)

            buf[written++] = (*start)[j];

         pos++;

      }


      *start += char_len;

   }


   /* If following chars are combining chars (e.g. accents) add them to the

    * buffer. We have reserved combining_char_space bytes for this.

    * If there should be more combining chars, we split nevertheless.

    */

   while (*start < end && written < buflen - 4) {

      code = a_Utf8_decode(*start, end, &char_len);


      if (! a_Utf8_combining_char(code))

         break;


      assert(char_len <= 4);

      for (j = 0; j < char_len; j++)

         buf[written++] = (*start)[j];


      *start += char_len;

   }


   return written;

}

a_Misc_expand_tabs(char **start, char *end, char *buf, int buflen) {…}


/* TODO: could use dStr ADT! */

typedef struct {

   const char *str;

   int len;

} ContentType_t;


static const ContentType_t MimeTypes[] = {

   { "application/octet-stream", 24 },

   { "application/xhtml+xml", 21 },

   { "text/html", 9 },

   { "text/plain", 10 },

   { "image/gif", 9 },

   { "image/png", 9 },

   { "image/jpeg", 10 },

   { NULL, 0 }

};

static const ContentType_t MimeTypes[] = {…};


typedef enum {

   DT_OCTET_STREAM = 0,

   DT_PLACEHOLDER,

   DT_TEXT_HTML,

   DT_TEXT_PLAIN,

   DT_IMAGE_GIF,

   DT_IMAGE_PNG,

   DT_IMAGE_JPG,

} DetectedContentType;

typedef enum {…};


int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT)

{

   size_t i, non_ascci, non_ascci_text, bin_chars;

   char *p = Data;

   int st = 1;      /* default to "doubt' */

   DetectedContentType Type = DT_OCTET_STREAM; /* default to binary */


   /* HTML try */

   for (i = 0; i < Size && dIsspace(p[i]); ++i);

   if ((Size - i >= 5  && !dStrnAsciiCasecmp(p+i, "<html", 5)) ||

       (Size - i >= 5  && !dStrnAsciiCasecmp(p+i, "<head", 5)) ||

       (Size - i >= 6  && !dStrnAsciiCasecmp(p+i, "<title", 6)) ||

       (Size - i >= 14 && !dStrnAsciiCasecmp(p+i, "<!doctype html", 14)) ||

       /* this line is workaround for FTP through the Squid proxy and Doxygen */

       (Size - i >= 9 && !dStrnAsciiCasecmp(p+i, "<!-- HTML", 9))) {


      Type = DT_TEXT_HTML;

      st = 0;

   /* Images */

   } else if (Size >= 4 && !strncmp(p, "GIF8", 4)) {

      Type = DT_IMAGE_GIF;

      st = 0;

   } else if (Size >= 4 && !strncmp(p, "\x89PNG", 4)) {

      Type = DT_IMAGE_PNG;

      st = 0;

   } else if (Size >= 2 && !strncmp(p, "\xff\xd8", 2)) {

      /* JPEG has the first 2 bytes set to 0xffd8 in BigEndian - looking

       * at the character representation should be machine independent. */

      Type = DT_IMAGE_JPG;

      st = 0;


   /* Text */

   } else {

      /* Heuristic for "text/plain"

       * {ASCII, LATIN1, UTF8, KOI8-R, CP-1251}

       * All in the above set regard [00-31] as control characters.

       * LATIN1: [7F-9F] unused

       * CP-1251 {7F,98} unused (two characters).

       *

       * We'll use [0-31] as indicators of non-text content.

       * Better heuristics are welcomed! :-) */


      non_ascci = non_ascci_text = bin_chars = 0;

      Size = MIN (Size, 256);

      for (i = 0; i < Size; i++) {

         int ch = (uchar_t) p[i];

         if (ch < 32 && !dIsspace(ch))

            ++bin_chars;

         if (ch > 126)

            ++non_ascci;

         if (ch > 190)

            ++non_ascci_text;

      }

      if (bin_chars == 0 && (non_ascci - non_ascci_text) <= Size/10) {

         /* Let's say text: if "rare" chars are <= 10% */

         Type = DT_TEXT_PLAIN;

      } else if (Size > 0) {

         /* a special check for UTF-8 */

         Size = a_Utf8_end_of_char(p, Size - 1) + 1;

         if (a_Utf8_test(p, Size) > 0)

            Type = DT_TEXT_PLAIN;

      }

      if (Size >= 256)

         st = 0;

   }


   *PT = MimeTypes[Type].str;

   return st;

}

int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT) {…}


void a_Misc_parse_content_type(const char *type, char **major, char **minor,

                               char **charset)

{

   static const char tspecials_space[] = "()<>@,;:\\\"/[]?= ";

   const char *str, *s;


   if (major)

      *major = NULL;

   if (minor)

      *minor = NULL;

   if (charset)

      *charset = NULL;

   if (!(str = type))

      return;


   for (s = str; *s && d_isascii((uchar_t)*s) && !iscntrl((uchar_t)*s) &&

        !strchr(tspecials_space, *s); s++) ;

   if (major)

      *major = dStrndup(str, s - str);


   if (*s == '/') {

      for (str = ++s; *s && d_isascii((uchar_t)*s) && !iscntrl((uchar_t)*s) &&

           !strchr(tspecials_space, *s); s++) ;

      if (minor)

         *minor = dStrndup(str, s - str);

   }

   if (charset && *s &&

       (dStrnAsciiCasecmp(type, "text/", 5) == 0 ||

        dStrnAsciiCasecmp(type, "application/xhtml+xml", 21) == 0)) {

      /* "charset" parameter defined for text media type in RFC 2046,

       * application/xhtml+xml in RFC 3236.

       *

       * Note that RFC 3023 lists some main xml media types and provides

       * the convention of using the "+xml" minor type suffix for other

       * xml types, so it would be reasonable to check for that suffix if

       * we have need to care about various xml types someday.

       */

      const char terminators[] = " ;\t";

      const char key[] = "charset";


      if ((s = dStriAsciiStr(str, key)) &&

          (s == str || strchr(terminators, s[-1]))) {

         s += sizeof(key) - 1;

         for ( ; *s == ' ' || *s == '\t'; ++s);

         if (*s == '=') {

            size_t len;

            for (++s; *s == ' ' || *s == '\t'; ++s);

            if ((len = strcspn(s, terminators))) {

               if (*s == '"' && s[len-1] == '"' && len > 1) {

                 /* quoted string */

                 s++;

                 len -= 2;

               }

               *charset = dStrndup(s, len);

            }

         }

      }

   }

}

void a_Misc_parse_content_type(const char *type, char **major, char **minor, {…}


int a_Misc_content_type_cmp(const char *ct1, const char *ct2)

{

   char *major1, *major2, *minor1, *minor2, *charset1, *charset2;

   int ret;


   if ((!ct1 || !*ct1) && (!ct2 || !*ct2))

      return 0;

   if ((!ct1 || !*ct1) || (!ct2 || !*ct2))

      return 1;


   a_Misc_parse_content_type(ct1, &major1, &minor1, &charset1);

   a_Misc_parse_content_type(ct2, &major2, &minor2, &charset2);


   if (major1 && major2 && !dStrAsciiCasecmp(major1, major2) &&

       minor1 && minor2 && !dStrAsciiCasecmp(minor1, minor2) &&

       ((!charset1 && !charset2) ||

        (charset1 && charset2 && !dStrAsciiCasecmp(charset1, charset2)) ||

        (!charset1 && charset2 && !dStrAsciiCasecmp(charset2, "UTF-8")) ||

        (charset1 && !charset2 && !dStrAsciiCasecmp(charset1, "UTF-8")))) {

      ret = 0;

   } else {

      ret = 1;

   }

   dFree(major1); dFree(major2);

   dFree(minor1); dFree(minor2);

   dFree(charset1); dFree(charset2);


   return ret;

}

int a_Misc_content_type_cmp(const char *ct1, const char *ct2) {…}


int a_Misc_content_type_check(const char *EntryType, const char *DetectedType)

{

   int i;

   int st = -1;


   _MSG("Type check:  [Srv: %s  Det: %s]\n", EntryType, DetectedType);


   if (!EntryType)

      return 0; /* there's no mismatch without server type */


   for (i = 1; MimeTypes[i].str; ++i)

      if (dStrnAsciiCasecmp(EntryType, MimeTypes[i].str, MimeTypes[i].len) ==0)

         break;


   if (!MimeTypes[i].str) {

      /* type not found, no mismatch */

      st = 0;

   } else if (dStrnAsciiCasecmp(EntryType, "image/", 6) == 0 &&

             !dStrnAsciiCasecmp(DetectedType, MimeTypes[i].str,

                                MimeTypes[i].len)){

      /* An image, and there's an exact match */

      st = 0;

   } else if (dStrnAsciiCasecmp(EntryType, "text/", 5) ||

              dStrnAsciiCasecmp(DetectedType, "application/", 12)) {

      /* Not an application sent as text */

      st = 0;

   } else if (dStrnAsciiCasecmp(EntryType, "application/xhtml+xml", 21) &&

              dStrnAsciiCasecmp(DetectedType, "text/html", 9)) {

      /* XML version of HTML */

      st = 0;

   }

   _MSG("Type check: %s\n", st == 0 ? "MATCH" : "MISMATCH");


   return st;

}

int a_Misc_content_type_check(const char *EntryType, const char *DetectedType) {…}


int a_Misc_parse_geometry(char *str, int *x, int *y, int *w, int *h)

{

   char *p, *t1, *t2;

   int n1, n2;

   int ret = 0;


   if ((p = strchr(str, 'x')) || (p = strchr(str, 'X'))) {

      n1 = strtol(str, &t1, 10);

      n2 = strtol(++p, &t2, 10);

      if (t1 != str && t2 != p) {

         *w = n1;

         *h = n2;

         ret = 1;

         /* parse x,y now */

         p = t2;

         n1 = strtol(p, &t1, 10);

         n2 = strtol(t1, &t2, 10);

         if (t1 != p && t2 != t1) {

            *x = n1;

            *y = n2;

         }

      }

   }

   _MSG("geom: w,h,x,y = (%d,%d,%d,%d)\n", *w, *h, *x, *y);

   return ret;

}

int a_Misc_parse_geometry(char *str, int *x, int *y, int *w, int *h) {…}


int a_Misc_parse_search_url(char *source, char **label, char **urlstr)

{

   static char buf[32];

   char *p, *q;

   int ret = -1;


   if ((p = strrchr(source, ' '))) {

      /* label and url pair */

      strncpy(buf,source,MIN(p-source,31));

      buf[MIN(p-source,31)] = 0;

      source = p+1;

      if ((p = strchr(source, '/')) && p[1] && (q = strchr(p+2,'/'))) {

         *urlstr = source;

         ret = 0;

      }

   } else {

      /* url only, make a custom label */

      if ((p = strchr(source, '/')) && p[1] && (q = strchr(p+2,'/'))) {

         strncpy(buf,p+2,MIN(q-p-2,31));

         buf[MIN(q-p-2,31)] = 0;

         *urlstr = source;

         ret = 0;

      }

   }

   *label = buf;

   if (ret == -1)

      MSG("Invalid search_url: \"%s\"\n", source);

   return ret;

}

int a_Misc_parse_search_url(char *source, char **label, char **urlstr) {…}


char *a_Misc_encode_base64(const char *in)

{

   static const char *const base64_hex = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

                                         "abcdefghijklmnopqrstuvwxyz"

                                         "0123456789+/";

   char *out = NULL;

   int len, i = 0;


   if (in == NULL) return NULL;

   len = strlen(in);


   out = (char *)dMalloc((len + 2) / 3 * 4 + 1);


   for (; len >= 3; len -= 3) {

      out[i++] = base64_hex[in[0] >> 2];

      out[i++] = base64_hex[((in[0]<<4) & 0x30) | (in[1]>>4)];

      out[i++] = base64_hex[((in[1]<<2) & 0x3c) | (in[2]>>6)];

      out[i++] = base64_hex[in[2] & 0x3f];

      in += 3;

   }


   if (len > 0) {

      unsigned char fragment;

      out[i++] = base64_hex[in[0] >> 2];

      fragment = (in[0] << 4) & 0x30;

      if (len > 1) fragment |= in[1] >> 4;

      out[i++] = base64_hex[fragment];

      out[i++] = (len < 2) ? '=' : base64_hex[(in[1] << 2) & 0x3c];

      out[i++] = '=';

   }

   out[i] = '\0';

   return out;

}

char *a_Misc_encode_base64(const char *in) {…}


Dstr *a_Misc_file2dstr(const char *filename)

{

   FILE *F_in;

   int n;

   char buf[4096];

   Dstr *dstr = NULL;


   if ((F_in = fopen(filename, "r"))) {

      dstr = dStr_sized_new(4096);

      while ((n = fread (buf, 1, 4096, F_in)) > 0) {

         dStr_append_l(dstr, buf, n);

      }

      fclose(F_in);

   }

   return dstr;

}

Dstr *a_Misc_file2dstr(const char *filename) {…}

_MSG
#define _MSG(...)
Definition bookmarks.c:45

MSG
#define MSG(...)
Definition bookmarks.c:46

uchar_t
unsigned char uchar_t
Definition d_size.h:17

uint_t
unsigned int uint_t
Definition d_size.h:20

dFree
void dFree(void *mem)
Definition dlib.c:68

dStrAsciiCasecmp
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:203

dStr_sized_new
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:254

dStrnAsciiCasecmp
int dStrnAsciiCasecmp(const char *s1, const char *s2, size_t n)
Definition dlib.c:215

dMalloc
void * dMalloc(size_t size)
Definition dlib.c:45

dStr_free
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:337

dStriAsciiStr
char * dStriAsciiStr(const char *haystack, const char *needle)
Case insensitive strstr.
Definition dlib.c:184

dStr_append_l
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:308

dStr_append_c
void dStr_append_c(Dstr *ds, int c)
Append one character.
Definition dlib.c:349

dStrndup
char * dStrndup(const char *s, size_t sz)
Definition dlib.c:88

MIN
#define MIN(a, b)
Definition dlib.h:30

dIsspace
#define dIsspace(c)
Definition dlib.h:33

FALSE
#define FALSE
Definition dlib.h:19

a_Misc_parse_search_url
int a_Misc_parse_search_url(char *source, char **label, char **urlstr)
Parse dillorc's search_url string ([<label> ]<url>) Return value: -1 on error, 0 on success (and labe...
Definition misc.c:391

a_Misc_content_type_check
int a_Misc_content_type_check(const char *EntryType, const char *DetectedType)
Check the server-supplied 'Content-Type' against our detected type.
Definition misc.c:321

a_Misc_expand_tabs
int a_Misc_expand_tabs(char **start, char *end, char *buf, int buflen)
Takes a string and converts any tabs to spaces.
Definition misc.c:54

a_Misc_file2dstr
Dstr * a_Misc_file2dstr(const char *filename)
Load a local file into a dStr.
Definition misc.c:464

a_Misc_parse_geometry
int a_Misc_parse_geometry(char *str, int *x, int *y, int *w, int *h)
Parse a geometry string.
Definition misc.c:360

a_Misc_encode_base64
char * a_Misc_encode_base64(const char *in)
Encodes string using base64 encoding.
Definition misc.c:425

DetectedContentType
DetectedContentType
Definition misc.c:116

DT_TEXT_PLAIN
@ DT_TEXT_PLAIN
Definition misc.c:120

DT_OCTET_STREAM
@ DT_OCTET_STREAM
Definition misc.c:117

DT_IMAGE_GIF
@ DT_IMAGE_GIF
Definition misc.c:121

DT_IMAGE_PNG
@ DT_IMAGE_PNG
Definition misc.c:122

DT_TEXT_HTML
@ DT_TEXT_HTML
Definition misc.c:119

DT_PLACEHOLDER
@ DT_PLACEHOLDER
Definition misc.c:118

DT_IMAGE_JPG
@ DT_IMAGE_JPG
Definition misc.c:123

MimeTypes
static const ContentType_t MimeTypes[]
Definition misc.c:105

TAB_SIZE
#define TAB_SIZE
Definition misc.c:49

a_Misc_escape_chars
char * a_Misc_escape_chars(const char *str, const char *esc_set)
Escape characters as XX sequences.
Definition misc.c:26

a_Misc_content_type_cmp
int a_Misc_content_type_cmp(const char *ct1, const char *ct2)
Compare two Content-Type strings.
Definition misc.c:274

a_Misc_get_content_type_from_data
int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT)
Detects 'Content-Type' from a data stream sample.
Definition misc.c:136

a_Misc_parse_content_type
void a_Misc_parse_content_type(const char *type, char **major, char **minor, char **charset)
Parse Content-Type string, e.g., "text/html; charset=utf-8".
Definition misc.c:210

misc.h

d_isascii
#define d_isascii(c)
Definition misc.h:11

msg.h

Dstr
Definition dlib.h:102

Dstr::str
Dstr_char_t * str
Definition dlib.h:105

a_Utf8_decode
uint_t a_Utf8_decode(const char *str, const char *end, int *len)
Decode a single UTF-8-encoded character starting at p.
Definition utf8.cc:46

a_Utf8_test
int a_Utf8_test(const char *src, unsigned int srclen)
Examine first srclen bytes of src.
Definition utf8.cc:64

a_Utf8_end_of_char
uint_t a_Utf8_end_of_char(const char *str, uint_t i)
Return index of the last byte of the UTF-8-encoded character that str + i points to or into.
Definition utf8.cc:23

a_Utf8_combining_char
bool_t a_Utf8_combining_char(int unicode)
Definition utf8.cc:96

utf8.hh