upstream/extra-patches/assumed-charset

   1 # vi: ft=diff
   2 This is the assumed_charset patch by Takashi TAKIZAWA <taki@cyber.email.ne.jp>.
   3
   4 The home page for this patch is:
   5
   6   http://www.emaillab.org/mutt/download15.html.en
   7
   8 * Patch last synced with upstream:
   9   - Date: 2004-07-22
  10   - File: http://www.emaillab.org/mutt/1.5/patch-1.5.6.tt.assumed_charset.1.gz
  11
  12 * Changes made: NONE.
  13
  14 * Notes:
  15   - the adjust_line and adjust_edited_file patches found in the above
  16     page were formerly applied to the debian package too, but as of
  17     1.5.8 they are merged upstream.
  18
  19 == END PATCH
  20 diff -uNr mutt-1.5.6.orig/charset.c mutt-1.5.6/charset.c
  21 --- mutt-1.5.6.orig/charset.c   Tue Jan 21 21:25:21 2003
  22 +++ mutt-1.5.6/charset.c        Sun Feb 15 15:13:26 2004
  23 @@ -581,3 +581,86 @@
  24      iconv_close (fc->cd);
  25    FREE (_fc);
  26  }
  27 +
  28 +char *mutt_get_first_charset (const char *charset)
  29 +{
  30 +  static char fcharset[SHORT_STRING];
  31 +  const char *c, *c1;
  32 +
  33 +  c = charset;
  34 +  if (!mutt_strlen(c))
  35 +    return "us-ascii";
  36 +  if (!(c1 = strchr (c, ':')))
  37 +    return charset;
  38 +  strfcpy (fcharset, c, c1 - c + 1);
  39 +  return fcharset;
  40 +}
  41 +
  42 +static size_t convert_string (ICONV_CONST char *f, size_t flen,
  43 +                             const char *from, const char *to,
  44 +                             char **t, size_t *tlen)
  45 +{
  46 +  iconv_t cd;
  47 +  char *buf, *ob;
  48 +  size_t obl, n;
  49 +  int e;
  50 +
  51 +  cd = mutt_iconv_open (to, from, 0);
  52 +  if (cd == (iconv_t)(-1))
  53 +    return (size_t)(-1);
  54 +  obl = 4 * flen + 1;
  55 +  ob = buf = safe_malloc (obl);
  56 +  n = iconv (cd, &f, &flen, &ob, &obl);
  57 +  if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
  58 +  {
  59 +    e = errno;
  60 +    FREE (&buf);
  61 +    iconv_close (cd);
  62 +    errno = e;
  63 +    return (size_t)(-1);
  64 +  }
  65 +  *ob = '\0';
  66 +
  67 +  *tlen = ob - buf;
  68 +
  69 +  safe_realloc ((void **) &buf, ob - buf + 1);
  70 +  *t = buf;
  71 +  iconv_close (cd);
  72 +
  73 +  return n;
  74 +}
  75 +
  76 +int mutt_convert_nonmime_string (char **ps)
  77 +{
  78 +  const char *c, *c1;
  79 +
  80 +  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
  81 +  {
  82 +    char *u = *ps;
  83 +    char *s;
  84 +    char *fromcode;
  85 +    size_t m, n;
  86 +    size_t ulen = mutt_strlen (*ps);
  87 +    size_t slen;
  88 +
  89 +    if (!u || !*u)
  90 +      return 0;
  91 +
  92 +    c1 = strchr (c, ':');
  93 +    n = c1 ? c1 - c : mutt_strlen (c);
  94 +    if (!n)
  95 +      continue;
  96 +    fromcode = safe_malloc (n + 1);
  97 +    strfcpy (fromcode, c, n + 1);
  98 +    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
  99 +    FREE (&fromcode);
 100 +    if (m != (size_t)(-1))
 101 +    {
 102 +      FREE (ps);
 103 +      *ps = s;
 104 +      return 0;
 105 +    }
 106 +  }
 107 +  return -1;
 108 +}
 109 +
 110 diff -uNr mutt-1.5.6.orig/charset.h mutt-1.5.6/charset.h
 111 --- mutt-1.5.6.orig/charset.h   Tue Mar  4 16:49:43 2003
 112 +++ mutt-1.5.6/charset.h        Sun Feb 15 15:06:19 2004
 113 @@ -35,6 +35,8 @@
 114  #endif
 115
 116  int mutt_convert_string (char **, const char *, const char *, int);
 117 +char *mutt_get_first_charset (const char *);
 118 +int mutt_convert_nonmime_string (char **);
 119
 120  iconv_t mutt_iconv_open (const char *, const char *, int);
 121  size_t mutt_iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *, ICONV_CONST char **, const char *);
 122 diff -uNr mutt-1.5.6.orig/globals.h mutt-1.5.6/globals.h
 123 --- mutt-1.5.6.orig/globals.h   Mon Feb  2 02:15:17 2004
 124 +++ mutt-1.5.6/globals.h        Sun Feb 15 15:06:19 2004
 125 @@ -32,6 +32,7 @@
 126
 127  WHERE char *AliasFile;
 128  WHERE char *AliasFmt;
 129 +WHERE char *AssumedCharset;
 130  WHERE char *AttachSep;
 131  WHERE char *Attribution;
 132  WHERE char *AttachFormat;
 133 @@ -45,6 +46,7 @@
 134  WHERE char *DsnReturn;
 135  WHERE char *Editor;
 136  WHERE char *EscChar;
 137 +WHERE char *FileCharset;
 138  WHERE char *FolderFormat;
 139  WHERE char *ForwFmt;
 140  WHERE char *Fqdn;
 141 diff -uNr mutt-1.5.6.orig/handler.c mutt-1.5.6/handler.c
 142 --- mutt-1.5.6.orig/handler.c   Wed Nov  5 18:41:31 2003
 143 +++ mutt-1.5.6/handler.c        Sun Feb 15 15:06:19 2004
 144 @@ -1718,11 +1718,21 @@
 145    int istext = mutt_is_text_part (b);
 146    iconv_t cd = (iconv_t)(-1);
 147
 148 -  if (istext && s->flags & M_CHARCONV)
 149 +  if (istext)
 150    {
 151 -    char *charset = mutt_get_parameter ("charset", b->parameter);
 152 -    if (charset && Charset)
 153 -      cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
 154 +    if(s->flags & M_CHARCONV)
 155 +    {
 156 +      char *charset = mutt_get_parameter ("charset", b->parameter);
 157 +      if (!option (OPTSTRICTMIME) && !charset)
 158 +        charset = mutt_get_first_charset (AssumedCharset);
 159 +      if (charset && Charset)
 160 +        cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
 161 +    }
 162 +    else
 163 +    {
 164 +      if (b->file_charset)
 165 +        cd = mutt_iconv_open (Charset, b->file_charset, M_ICONV_HOOK_FROM);
 166 +    }
 167    }
 168
 169    fseek (s->fpin, b->offset, 0);
 170 diff -uNr mutt-1.5.6.orig/init.h mutt-1.5.6/init.h
 171 --- mutt-1.5.6.orig/init.h      Mon Feb  2 02:15:17 2004
 172 +++ mutt-1.5.6/init.h   Sun Feb 15 15:07:40 2004
 173 @@ -184,6 +184,23 @@
 174    ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before
 175    ** editing the body of an outgoing message.
 176    */
 177 +  { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL "us-ascii"},
 178 +  /*
 179 +  ** .pp
 180 +  ** This variable is a colon-separated list of character encoding
 181 +  ** schemes for messages without character encoding indication.
 182 +  ** Header field values and message body content without character encoding
 183 +  ** indication would be assumed that they are written in one of this list.
 184 +  ** By default, all the header fields and message body without any charset
 185 +  ** indication are assumed to be in "us-ascii".
 186 +  ** .pp
 187 +  ** For example, Japanese users might prefer this:
 188 +  ** .pp
 189 +  **   set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
 190 +  ** .pp
 191 +  ** However, only the first content is valid for the message body.
 192 +  ** This variable is valid only if $$strict_mime is unset.
 193 +  */
 194    { "attach_format",   DT_STR,  R_NONE, UL &AttachFormat, UL "%u%D%I %t%4n %T%.40d%> [%.7m/%.10M, %.6e%?C?, %C?, %s] " },
 195    /*
 196    ** .pp
 197 @@ -532,6 +549,20 @@
 198    ** signed.
 199    ** (PGP only)
 200    */
 201 +  { "file_charset",    DT_STR,  R_NONE, UL &FileCharset, UL 0 },
 202 +  /*
 203 +  ** .pp
 204 +  ** This variable is a colon-separated list of character encoding
 205 +  ** schemes for text file attatchments.
 206 +  ** If unset, $$charset value will be used instead.
 207 +  ** For example, the following configuration would work for Japanese
 208 +  ** text handling:
 209 +  ** .pp
 210 +  **   set file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
 211 +  ** .pp
 212 +  ** Note: "iso-2022-*" must be put at the head of the value as shown above
 213 +  ** if included.
 214 +  */
 215    { "folder",          DT_PATH, R_NONE, UL &Maildir, UL "~/Mail" },
 216    /*
 217    ** .pp
 218 @@ -2476,6 +2507,19 @@
 219    ** Setting this variable causes the ``status bar'' to be displayed on
 220    ** the first line of the screen rather than near the bottom.
 221    */
 222 +  { "strict_mime",    DT_BOOL, R_NONE, OPTSTRICTMIME, 1 },
 223 +  /*
 224 +  ** .pp
 225 +  ** When unset, non MIME-compliant messages that doesn't have any
 226 +  ** charset indication in ``Content-Type'' field can be displayed
 227 +  ** (non MIME-compliant messages are often generated by old mailers
 228 +  ** or buggy mailers like MS Outlook Express).
 229 +  ** See also $$assumed_charset.
 230 +  ** .pp
 231 +  ** This option also replaces linear-white-space between encoded-word
 232 +  ** and *text to a single space to prevent the display of MIME-encoded
 233 +  ** ``Subject'' field from being devided into multiple lines.
 234 +  */
 235    { "strict_threads",  DT_BOOL, R_RESORT|R_RESORT_INIT|R_INDEX, OPTSTRICTTHREADS, 0 },
 236    /*
 237    ** .pp
 238 diff -uNr mutt-1.5.6.orig/mutt.h mutt-1.5.6/mutt.h
 239 --- mutt-1.5.6.orig/mutt.h      Mon Feb  2 02:15:17 2004
 240 +++ mutt-1.5.6/mutt.h   Sun Feb 15 15:06:19 2004
 241 @@ -406,6 +406,7 @@
 242    OPTSIGONTOP,
 243    OPTSORTRE,
 244    OPTSTATUSONTOP,
 245 +  OPTSTRICTMIME,
 246    OPTSTRICTTHREADS,
 247    OPTSUSPEND,
 248    OPTTEXTFLOWED,
 249 @@ -599,6 +600,7 @@
 250                                  * If NULL, filename is used
 251                                  * instead.
 252                                  */
 253 +  char *file_charset;           /* charset of attached file */
 254    CONTENT *content;             /* structure used to store detailed info about
 255                                  * the content of the attachment.  this is used
 256                                  * to determine what content-transfer-encoding
 257 diff -uNr mutt-1.5.6.orig/parse.c mutt-1.5.6/parse.c
 258 --- mutt-1.5.6.orig/parse.c     Wed Nov  5 18:41:33 2003
 259 +++ mutt-1.5.6/parse.c  Sun Feb 15 15:06:19 2004
 260 @@ -208,9 +208,23 @@
 261
 262        if (*s == '"')
 263        {
 264 +        int state_ascii = 1;
 265         s++;
 266 -       for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
 267 +       for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
 268         {
 269 +         if (!option (OPTSTRICTMIME)) {
 270 +            /* As iso-2022-* has a characer of '"' with non-ascii state,
 271 +            * ignore it. */
 272 +            if (*s == 0x1b && i < sizeof (buffer) - 2)
 273 +            {
 274 +              if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J'))
 275 +                state_ascii = 1;
 276 +              else
 277 +                state_ascii = 0;
 278 +            }
 279 +          }
 280 +          if (state_ascii && *s == '"')
 281 +            break;
 282           if (*s == '\\')
 283           {
 284             /* Quote the next character */
 285 @@ -379,7 +393,9 @@
 286    if (ct->type == TYPETEXT)
 287    {
 288      if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
 289 -      mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
 290 +      mutt_set_parameter ("charset", option (OPTSTRICTMIME) ? "us-ascii" :
 291 +                         (const char *) mutt_get_first_charset (AssumedCharset),
 292 +                         &ct->parameter);
 293    }
 294
 295  }
 296 diff -uNr mutt-1.5.6.orig/rfc2047.c mutt-1.5.6/rfc2047.c
 297 --- mutt-1.5.6.orig/rfc2047.c   Wed Nov  5 18:41:33 2003
 298 +++ mutt-1.5.6/rfc2047.c        Sun Feb 15 15:13:58 2004
 299 @@ -706,13 +706,54 @@
 300    return 0;
 301  }
 302
 303 +/* return length of linear white space */
 304 +static size_t lwslen (const char *s, size_t n)
 305 +{
 306 +  const char *p = s;
 307 +  size_t len = n;
 308 +
 309 +  if (n <= 0)
 310 +    return 0;
 311 +
 312 +  for (; p < s + n; p++)
 313 +    if (!strchr (" \t\r\n", *p))
 314 +    {
 315 +      len = (size_t)(p - s);
 316 +      break;
 317 +    }
 318 +  if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
 319 +    len = (size_t)0;
 320 +  return len;
 321 +}
 322 +
 323 +/* return length of linear white space : reverse */
 324 +static size_t lwsrlen (const char *s, size_t n)
 325 +{
 326 +  const char *p = s + n - 1;
 327 +  size_t len = n;
 328 +
 329 +  if (n <= 0)
 330 +    return 0;
 331 +
 332 +  if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
 333 +    return (size_t)0;
 334 +
 335 +  for (; p >= s; p--)
 336 +    if (!strchr (" \t\r\n", *p))
 337 +    {
 338 +      len = (size_t)(s + n - 1 - p);
 339 +      break;
 340 +    }
 341 +  return len;
 342 +}
 343 +
 344  /* try to decode anything that looks like a valid RFC2047 encoded
 345   * header field, ignoring RFC822 parsing rules
 346   */
 347  void rfc2047_decode (char **pd)
 348  {
 349    const char *p, *q;
 350 -  size_t n;
 351 +  size_t m, n;
 352    int found_encoded = 0;
 353    char *d0, *d;
 354    const char *s = *pd;
 355 @@ -729,6 +770,37 @@
 356      if (!(p = find_encoded_word (s, &q)))
 357      {
 358        /* no encoded words */
 359 +      if (!option (OPTSTRICTMIME))
 360 +      {
 361 +        n = mutt_strlen (s);
 362 +        if (found_encoded && (m = lwslen (s, n)) != 0)
 363 +        {
 364 +          if (m != n)
 365 +            *d = ' ', d++, dlen--;
 366 +          n -= m, s += m;
 367 +        }
 368 +        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
 369 +        {
 370 +          char *t;
 371 +          size_t tlen;
 372 +
 373 +          t = safe_malloc (n + 1);
 374 +          strfcpy (t, s, n + 1);
 375 +          if (mutt_convert_nonmime_string (&t) == 0)
 376 +          {
 377 +            tlen = mutt_strlen (t);
 378 +            strncpy (d, t, tlen);
 379 +            d += tlen;
 380 +          }
 381 +          else
 382 +          {
 383 +            strncpy (d, s, n);
 384 +            d += n;
 385 +          }
 386 +          FREE (&t);
 387 +          break;
 388 +        }
 389 +      }
 390        strncpy (d, s, dlen);
 391        d += dlen;
 392        break;
 393 @@ -737,15 +809,37 @@
 394      if (p != s)
 395      {
 396        n = (size_t) (p - s);
 397 -      /* ignore spaces between encoded words */
 398 -      if (!found_encoded || strspn (s, " \t\r\n") != n)
 399 +      /* ignore spaces between encoded words
 400 +       * and linear white spaces between encoded word and *text */
 401 +      if (!option (OPTSTRICTMIME))
 402        {
 403 -       if (n > dlen)
 404 -         n = dlen;
 405 -       memcpy (d, s, n);
 406 -       d += n;
 407 -       dlen -= n;
 408 +        if (found_encoded && (m = lwslen (s, n)) != 0)
 409 +        {
 410 +          if (m != n)
 411 +            *d = ' ', d++, dlen--;
 412 +          n -= m, s += m;
 413 +        }
 414 +
 415 +        if ((m = n - lwsrlen (s, n)) != 0)
 416 +        {
 417 +          if (m > dlen)
 418 +            m = dlen;
 419 +          memcpy (d, s, m);
 420 +          d += m;
 421 +          dlen -= m;
 422 +          if (m != n)
 423 +            *d = ' ', d++, dlen--;
 424 +        }
 425        }
 426 +      else if (!found_encoded || strspn (s, " \t\r\n") != n)
 427 +      {
 428 +        if (n > dlen)
 429 +          n = dlen;
 430 +        memcpy (d, s, n);
 431 +        d += n;
 432 +        dlen -= n;
 433 +      }
 434 +
 435      }
 436
 437      rfc2047_decode_word (d, p, dlen);
 438 @@ -766,7 +860,7 @@
 439  {
 440    while (a)
 441    {
 442 -    if (a->personal && strstr (a->personal, "=?") != NULL)
 443 +    if (a->personal)
 444        rfc2047_decode (&a->personal);
 445  #ifdef EXACT_ADDRESS
 446      if (a->val && strstr (a->val, "=?") != NULL)
 447 diff -uNr mutt-1.5.6.orig/rfc2231.c mutt-1.5.6/rfc2231.c
 448 --- mutt-1.5.6.orig/rfc2231.c   Wed Nov  5 18:41:33 2003
 449 +++ mutt-1.5.6/rfc2231.c        Sun Feb 15 15:06:19 2004
 450 @@ -113,6 +113,11 @@
 451
 452        if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?"))
 453         rfc2047_decode (&p->value);
 454 +      else if (!option (OPTSTRICTMIME))
 455 +      {
 456 +        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
 457 +          mutt_convert_nonmime_string (&p->value);
 458 +      }
 459
 460        *last = p;
 461        last = &p->next;
 462 diff -uNr mutt-1.5.6.orig/sendlib.c mutt-1.5.6/sendlib.c
 463 --- mutt-1.5.6.orig/sendlib.c   Wed Nov  5 18:41:33 2003
 464 +++ mutt-1.5.6/sendlib.c        Sun Feb 15 15:11:33 2004
 465 @@ -496,7 +496,7 @@
 466    }
 467
 468    if (a->type == TYPETEXT && (!a->noconv))
 469 -    fc = fgetconv_open (fpin, Charset,
 470 +    fc = fgetconv_open (fpin, a->file_charset,
 471                         mutt_get_body_charset (send_charset, sizeof (send_charset), a),
 472                         0);
 473    else
 474 @@ -896,6 +896,7 @@
 475    CONTENT *info;
 476    CONTENT_STATE state;
 477    FILE *fp = NULL;
 478 +  char *fromcode;
 479    char *tocode;
 480    char buffer[100];
 481    char chsbuf[STRING];
 482 @@ -930,15 +931,18 @@
 483    if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset))
 484    {
 485      char *chs = mutt_get_parameter ("charset", b->parameter);
 486 +    char *fchs = b->use_disp ? ((FileCharset && *FileCharset) ?
 487 +                                FileCharset : Charset) : Charset;
 488      if (Charset && (chs || SendCharset) &&
 489 -       convert_file_from_to (fp, Charset, chs ? chs : SendCharset,
 490 -                             0, &tocode, info) != (size_t)(-1))
 491 +        convert_file_from_to (fp, fchs, chs ? chs : SendCharset,
 492 +                              &fromcode, &tocode, info) != (size_t)(-1))
 493      {
 494        if (!chs)
 495        {
 496         mutt_canonical_charset (chsbuf, sizeof (chsbuf), tocode);
 497         mutt_set_parameter ("charset", chsbuf, &b->parameter);
 498        }
 499 +      b->file_charset = fromcode;
 500        FREE (&tocode);
 501        safe_fclose (&fp);
 502        return info;
 503 @@ -1318,6 +1322,7 @@
 504    body->unlink = 1;
 505    body->use_disp = 0;
 506    body->disposition = DISPINLINE;
 507 +  body->noconv = 1;
 508
 509    mutt_parse_mime_message (ctx, hdr);
 510
 511 diff -uNr mutt-1.5.6.orig/PATCHES mutt-1.5.6/PATCHES
 512 --- mutt-1.5.6.orig/PATCHES     Mon Feb  2 02:42:47 2004
 513 +++ mutt-1.5.6/PATCHES  Sun Feb 15 15:14:55 2004
 514 @@ -0,0 +1 @@
 515 +patch-1.5.6.tt.assumed_charset.1