From mboxrd@z Thu Jan 1 00:00:00 1970 Path: main.gmane.org!not-for-mail From: Michael Slass Newsgroups: gmane.emacs.help Subject: Re: character encoding Date: Sat, 26 Oct 2002 19:13:04 GMT Organization: AT&T Broadband Sender: help-gnu-emacs-admin@gnu.org Message-ID: References: <87smz13p05.fsf@localhost.localdomain> <87eladhzo4.fsf@juliva.com> NNTP-Posting-Host: main.gmane.org Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii X-Trace: main.gmane.org 1035660186 3838 80.91.224.249 (26 Oct 2002 19:23:06 GMT) X-Complaints-To: usenet@main.gmane.org NNTP-Posting-Date: Sat, 26 Oct 2002 19:23:06 +0000 (UTC) Return-path: Original-Received: from monty-python.gnu.org ([199.232.76.173]) by main.gmane.org with esmtp (Exim 3.35 #1 (Debian)) id 185WWU-0000z6-00 for ; Sat, 26 Oct 2002 21:23:02 +0200 Original-Received: from localhost ([127.0.0.1] helo=monty-python.gnu.org) by monty-python.gnu.org with esmtp (Exim 4.10) id 185WQ5-0005RA-00; Sat, 26 Oct 2002 15:16:25 -0400 Original-Path: shelby.stanford.edu!newsfeed.stanford.edu!logbridge.uoregon.edu!arclight.uoregon.edu!wn13feed!worldnet.att.net!204.127.198.204!attbi_feed4!attbi.com!sccrnsc02.POSTED!not-for-mail Original-Newsgroups: gnu.emacs.help User-Agent: Gnus/5.09 (Gnus v5.9.0) Emacs/21.2 Original-Lines: 124 Original-NNTP-Posting-Host: 12.228.27.239 Original-X-Complaints-To: abuse@attbi.com Original-X-Trace: sccrnsc02 1035659584 12.228.27.239 (Sat, 26 Oct 2002 19:13:04 GMT) Original-NNTP-Posting-Date: Sat, 26 Oct 2002 19:13:04 GMT Original-Xref: shelby.stanford.edu gnu.emacs.help:106425 Original-To: help-gnu-emacs@gnu.org Errors-To: help-gnu-emacs-admin@gnu.org X-BeenThere: help-gnu-emacs@gnu.org X-Mailman-Version: 2.0.11 Precedence: bulk List-Help: List-Post: List-Subscribe: , List-Id: Users list for the GNU Emacs text editor List-Unsubscribe: , List-Archive: Xref: main.gmane.org gmane.emacs.help:2975 X-Report-Spam: http://spam.gmane.org/gmane.emacs.help:2975 Ivan Kanis writes: > >I know it's in C. If someone cares to turn this into lisp that'll be neat :) > >Ivan > > >#include "stdio.h" > >char *table [] = { > "euro", /* 0x80 0x20AC #EURO SIGN */ > "", /* 0x81 #UNDEFINED */ > "\"", /* 0x82 0x201A #SINGLE LOW-9 QUOTATION MARK */ > "f", /* 0x83 0x0192 #LATIN SMALL LETTER F WITH HOOK */ > "\"", /* 0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK */ > "...", /* 0x85 0x2026 #HORIZONTAL ELLIPSIS */ > "*", /* 0x86 0x2020 #DAGGER */ > "*", /* 0x87 0x2021 #DOUBLE DAGGER */ > "^", /* 0x88 0x02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT */ > " 0/00", /* 0x89 0x2030 #PER MILLE SIGN */ > "S", /* 0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON */ > "<", /* 0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ > "OE", /* 0x8C 0x0152 #LATIN CAPITAL LIGATURE OE */ > "", /* 0x8D #UNDEFINED */ > "Z", /* 0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON */ > "", /* 0x8F #UNDEFINED */ > "", /* 0x90 #UNDEFINED */ > "'", /* 0x91 0x2018 #LEFT SINGLE QUOTATION MARK */ > "'", /* 0x92 0x2019 #RIGHT SINGLE QUOTATION MARK */ > "\"", /* 0x93 0x201C #LEFT DOUBLE QUOTATION MARK */ > "\"", /* 0x94 0x201D #RIGHT DOUBLE QUOTATION MARK */ > "*", /* 0x95 0x2022 #BULLET */ > "-", /* 0x96 0x2013 #EN DASH */ > "-", /* 0x97 0x2014 #EM DASH */ > "~", /* 0x98 0x02DC #SMALL TILDE */ > "(TM)", /* 0x99 0x2122 #TRADE MARK SIGN */ > "s", /* 0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON */ > "\"", /* 0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ > "oe", /* 0x9C 0x0153 #LATIN SMALL LIGATURE OE */ > "", /* 0x9D #UNDEFINED */ > "z", /* 0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON */ > "y" /* 0x9F 0x0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS */ >}; > > >int main (int argc, char **argv) { > FILE *fd; > unsigned char in; > > if (argc == 2) { > if ((fd = fopen(argv[1], "r"))) { > while (fread(&in, 1, sizeof(char), fd)) { > if (in >= 0x80 && in < 0xa0) { > printf ("%s", table[in-0x80]); > } else { > printf("%c", in); > } > } > fclose (fd); > } > } > return 0; >} Ivan: I can't resist that challenge. Here's a first cut, almost completely untested, because I don't have any Outlook-born mail to test it on. (defvar de-microsquish-translation-alist '(( ?\x80 . "euro" ) ( ?\x81 . "") ( ?\x82 . "\"" ) ( ?\x83 . "f" ) ( ?\x84 . "\"" ) ( ?\x85 . "..." ) ( ?\x86 . "*" ) ( ?\x87 . "*" ) ( ?\x88 . "^" ) ( ?\x89 . " 0/00" ) ( ?\x8A . "S" ) ( ?\x8B . "<" ) ( ?\x8C . "OE" ) ( ?\x8E . "Z" ) ( ?\x8F . "" ) ( ?\x90 . "" ) ( ?\x91 . "'" ) ( ?\x92 . "'" ) ( ?\x93 . "" ) ( ?\x94 . "" ) ( ?\x95 . "*" ) ( ?\x96 . "-" ) ( ?\x97 . "-" ) ( ?\x98 . "~" ) ( ?\x99 . "(TM)" ) ( ?\x9A . "s" ) ( ?\x9B . "\"" ) ( ?\x9C . "oe" ) ( ?\x9D . "" ) ( ?\x9E . "z" ) ( ?\x9F . "y" )) "Table of hex values and replacement strings for unprintable Micro$oft chars. See also `de-microsquish-region'.") (defun de-microsquish-region (beg end) "Translate Micro$oft characters according to `de-microsquish-translation-alist'" (interactive "r") (save-restriction (narrow-to-region beg end) (goto-char (point-min)) (while (not (eobp)) (let* ((char (char-after)) (replacement-cell (assoc char de-microsquish-translation-alist)) (replacement (and replacement-cell (cdr replacement-cell)))) (if (not replacement) (forward-char 1) (delete-char 1) (insert replacement)))))) -- Mike Slass