unofficial mirror of help-gnu-emacs@gnu.org
 help / color / mirror / Atom feed
From: Michael Slass <miknrene@drizzle.com>
Subject: Re: character encoding
Date: Sat, 26 Oct 2002 19:13:04 GMT	[thread overview]
Message-ID: <m3lm4l6mco.fsf@localhost.localdomain> (raw)
In-Reply-To: 87eladhzo4.fsf@juliva.com

Ivan Kanis <ivank@juliva.com> writes:

>
>I know it's in C. If someone cares to turn this into lisp that'll be neat :)
>
>Ivan
>
>
>#include "stdio.h"
>
>char *table [] =  {
>    "euro",  /* 0x80 0x20AC  #EURO SIGN */
>    "",      /* 0x81          #UNDEFINED */
>    "\"",    /* 0x82  0x201A  #SINGLE LOW-9 QUOTATION MARK */
>    "f",     /* 0x83  0x0192  #LATIN SMALL LETTER F WITH HOOK */
>    "\"",    /* 0x84  0x201E  #DOUBLE LOW-9 QUOTATION MARK */
>    "...",   /* 0x85  0x2026  #HORIZONTAL ELLIPSIS */
>    "*",     /* 0x86  0x2020  #DAGGER */
>    "*",     /* 0x87  0x2021  #DOUBLE DAGGER */
>    "^",     /* 0x88  0x02C6  #MODIFIER LETTER CIRCUMFLEX ACCENT */
>    " 0/00", /* 0x89  0x2030  #PER MILLE SIGN */
>    "S",     /* 0x8A  0x0160  #LATIN CAPITAL LETTER S WITH CARON */
>    "<",     /* 0x8B  0x2039  #SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
>    "OE",    /* 0x8C  0x0152  #LATIN CAPITAL LIGATURE OE */
>    "",      /* 0x8D          #UNDEFINED */
>    "Z",     /* 0x8E  0x017D  #LATIN CAPITAL LETTER Z WITH CARON */
>    "",      /* 0x8F          #UNDEFINED */
>    "",      /* 0x90          #UNDEFINED */
>    "'",     /* 0x91  0x2018  #LEFT SINGLE QUOTATION MARK */
>    "'",     /* 0x92  0x2019  #RIGHT SINGLE QUOTATION MARK */
>    "\"",    /* 0x93  0x201C  #LEFT DOUBLE QUOTATION MARK */
>    "\"",    /* 0x94  0x201D  #RIGHT DOUBLE QUOTATION MARK */
>    "*",     /* 0x95  0x2022  #BULLET */
>    "-",     /* 0x96  0x2013  #EN DASH */
>    "-",     /* 0x97  0x2014  #EM DASH */
>    "~",     /* 0x98  0x02DC  #SMALL TILDE */
>    "(TM)",  /* 0x99  0x2122  #TRADE MARK SIGN */
>    "s",     /* 0x9A  0x0161  #LATIN SMALL LETTER S WITH CARON */
>    "\"",    /* 0x9B  0x203A  #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
>    "oe",    /* 0x9C  0x0153  #LATIN SMALL LIGATURE OE */
>    "",      /* 0x9D          #UNDEFINED */
>    "z",     /* 0x9E  0x017E  #LATIN SMALL LETTER Z WITH CARON */
>    "y"     /* 0x9F  0x0178  #LATIN CAPITAL LETTER Y WITH DIAERESIS */
>};
>
>
>int main (int argc, char **argv) {
>    FILE *fd;
>    unsigned char in;
>    
>    if (argc == 2) {
>        if ((fd = fopen(argv[1], "r"))) {
>            while (fread(&in, 1, sizeof(char), fd)) {
>                if (in >= 0x80 && in < 0xa0) {
>                    printf ("%s", table[in-0x80]);
>                } else {
>                    printf("%c", in);
>                }
>            }
>            fclose (fd);
>        }
>    }
>    return 0;
>}

Ivan:

I can't resist that challenge.  Here's a first cut, almost completely
untested, because I don't have any Outlook-born mail to test it on.

(defvar de-microsquish-translation-alist
   '(( ?\x80 . "euro" )
     ( ?\x81 . "")
     ( ?\x82 . "\"" )
     ( ?\x83 . "f" )
     ( ?\x84 . "\"" )
     ( ?\x85 . "..." )
     ( ?\x86 . "*" )
     ( ?\x87 . "*" )
     ( ?\x88 . "^" )
     ( ?\x89 . " 0/00" )
     ( ?\x8A . "S" )
     ( ?\x8B . "<" )
     ( ?\x8C . "OE" )
     ( ?\x8E . "Z" )
     ( ?\x8F . "" )
     ( ?\x90 . "" )
     ( ?\x91 . "'" )
     ( ?\x92 . "'" )
     ( ?\x93 . "" )
     ( ?\x94 . "" )
     ( ?\x95 . "*" )
     ( ?\x96 . "-" )
     ( ?\x97 . "-" )
     ( ?\x98 . "~" )
     ( ?\x99 . "(TM)" )
     ( ?\x9A . "s" )
     ( ?\x9B . "\"" )
     ( ?\x9C . "oe" )
     ( ?\x9D . "" )
     ( ?\x9E . "z" )
     ( ?\x9F . "y" ))
"Table of hex values and replacement strings for unprintable Micro$oft chars.
See also `de-microsquish-region'.")


(defun de-microsquish-region (beg end)
  "Translate Micro$oft characters according to `de-microsquish-translation-alist'"
  (interactive "r")
  (save-restriction
    (narrow-to-region beg end)
    (goto-char (point-min))
    (while (not (eobp))
      (let* ((char (char-after))
             (replacement-cell (assoc char de-microsquish-translation-alist))
             (replacement (and replacement-cell (cdr replacement-cell))))
        (if (not replacement)
            (forward-char 1)
          (delete-char 1)
          (insert replacement))))))


-- 
Mike Slass

  reply	other threads:[~2002-10-26 19:13 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-10-20 19:01 character encoding Hugh Lawson
2002-10-21  7:36 ` Charles Muller
     [not found] ` <mailman.1035185597.6743.help-gnu-emacs@gnu.org>
2002-10-26 17:34   ` Ivan Kanis
2002-10-26 19:13     ` Michael Slass [this message]
2002-10-26 20:05       ` Michael Slass

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m3lm4l6mco.fsf@localhost.localdomain \
    --to=miknrene@drizzle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).