From: Dan Nicolaescu <dann@ics.uci.edu>
To: emacs-devel@gnu.org
Subject: Problem report #110: base/src/emacs/src/coding.c (decode_coding_utf_8); UNINIT
Date: Tue, 2 Dec 2008 14:18:20 -0800 (PST) [thread overview]
Message-ID: <200812022218.mB2MIKBl021746@mothra.ics.uci.edu> (raw)
CID: 110
Checker: UNINIT (help)
File: base/src/emacs/src/coding.c
Function: decode_coding_utf_8
Description: Using uninitialized value "consumed_chars_base"
Event var_decl: Declared variable "consumed_chars_base" without initializer
Also see events: [uninit_use]
1329 int consumed_chars = 0, consumed_chars_base;
1330 int multibytep = coding->src_multibyte;
1331 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1332 Lisp_Object attr, charset_list;
At conditional (1): "((0), (((0), ((Vcoding_system_hash_table & -8)->key_and_value & -8))->contents[((2 * (coding)->id) + 1)] & -8))->contents[2] == Qdos" taking true path
1333 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1334 int byte_after_cr = -1;
1335
At conditional (2): "0" taking false path
1336 CODING_GET_INFO (coding, attr, charset_list);
1337
At conditional (3): "bom != 1" taking true path
1338 if (bom != utf_without_bom)
1339 {
1340 int c1, c2, c3;
1341
1342 src_base = src;
At conditional (4): "src == src_end" taking true path
At conditional (5): "src_base < src" taking true path
1343 ONE_MORE_BYTE (c1);
1344 if (! UTF_8_3_OCTET_LEADING_P (c1))
1345 src = src_base;
1346 else
1347 {
1348 ONE_MORE_BYTE (c2);
1349 if (! UTF_8_EXTRA_OCTET_P (c2))
1350 src = src_base;
1351 else
1352 {
1353 ONE_MORE_BYTE (c3);
1354 if (! UTF_8_EXTRA_OCTET_P (c3))
1355 src = src_base;
1356 else
1357 {
1358 if ((c1 != UTF_8_BOM_1)
1359 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1360 src = src_base;
1361 else
1362 CODING_UTF_8_BOM (coding) = utf_without_bom;
1363 }
1364 }
1365 }
1366 }
1367 CODING_UTF_8_BOM (coding) = utf_without_bom;
1368
1369
1370
1371 while (1)
1372 {
1373 int c, c1, c2, c3, c4, c5;
1374
1375 src_base = src;
1376 consumed_chars_base = consumed_chars;
1377
1378 if (charbuf >= charbuf_end)
1379 break;
1380
1381 if (byte_after_cr >= 0)
1382 c1 = byte_after_cr, byte_after_cr = -1;
1383 else
1384 ONE_MORE_BYTE (c1);
1385 if (c1 < 0)
1386 {
1387 c = - c1;
1388 }
1389 else if (UTF_8_1_OCTET_P(c1))
1390 {
1391 if (eol_crlf && c1 == '\r')
1392 ONE_MORE_BYTE (byte_after_cr);
1393 c = c1;
1394 }
1395 else
1396 {
1397 ONE_MORE_BYTE (c2);
1398 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1399 goto invalid_code;
1400 if (UTF_8_2_OCTET_LEADING_P (c1))
1401 {
1402 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1403 /* Reject overlong sequences here and below. Encoders
1404 producing them are incorrect, they can be misleading,
1405 and they mess up read/write invariance. */
1406 if (c < 128)
1407 goto invalid_code;
1408 }
1409 else
1410 {
1411 ONE_MORE_BYTE (c3);
1412 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1413 goto invalid_code;
1414 if (UTF_8_3_OCTET_LEADING_P (c1))
1415 {
1416 c = (((c1 & 0xF) << 12)
1417 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1418 if (c < 0x800
1419 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1420 goto invalid_code;
1421 }
1422 else
1423 {
1424 ONE_MORE_BYTE (c4);
1425 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1426 goto invalid_code;
1427 if (UTF_8_4_OCTET_LEADING_P (c1))
1428 {
1429 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1430 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1431 if (c < 0x10000)
1432 goto invalid_code;
1433 }
1434 else
1435 {
1436 ONE_MORE_BYTE (c5);
1437 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1438 goto invalid_code;
1439 if (UTF_8_5_OCTET_LEADING_P (c1))
1440 {
1441 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1442 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1443 | (c5 & 0x3F));
1444 if ((c > MAX_CHAR) || (c < 0x200000))
1445 goto invalid_code;
1446 }
1447 else
1448 goto invalid_code;
1449 }
1450 }
1451 }
1452 }
1453
1454 *charbuf++ = c;
1455 continue;
1456
1457 invalid_code:
1458 src = src_base;
1459 consumed_chars = consumed_chars_base;
1460 ONE_MORE_BYTE (c);
1461 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1462 coding->errors++;
1463 }
1464
1465 no_more_source:
Event uninit_use: Using uninitialized value "consumed_chars_base"
Also see events: [var_decl]
1466 coding->consumed_char += consumed_chars_base;
1467 coding->consumed = src_base - coding->source;
1468 coding->charbuf_used = charbuf - coding->charbuf;
next reply other threads:[~2008-12-02 22:18 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-12-02 22:18 Dan Nicolaescu [this message]
2008-12-03 1:23 ` Problem report #110: base/src/emacs/src/coding.c (decode_coding_utf_8); UNINIT Kenichi Handa
2008-12-03 5:20 ` Kenichi Handa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200812022218.mB2MIKBl021746@mothra.ics.uci.edu \
--to=dann@ics.uci.edu \
--cc=emacs-devel@gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).