From: Paul Eggert <eggert@cs.ucla.edu>
To: 9428@debbugs.gnu.org
Subject: bug#9428: Emacs mishandles file offsets > 2**29 bytes
Date: Fri, 02 Sep 2011 22:29:36 -0700 [thread overview]
Message-ID: <4E61BB40.7010605@cs.ucla.edu> (raw)
Package: Emacs
Version: 24.0.50
Tags: patch
This patch fixes some integer-overflow issues with large files, mostly
on 32-bit hosts. I plan to test it a bit more before installing.
=== modified file 'src/ChangeLog'
--- src/ChangeLog 2011-08-30 22:43:43 +0000
+++ src/ChangeLog 2011-09-03 05:23:17 +0000
@@ -1,3 +1,22 @@
+2011-09-03 Paul Eggert <eggert@cs.ucla.edu>
+
+ * fileio.c: Fix bugs with large file offsets.
+ The previous code assumed that file offsets (off_t values) fit in
+ EMACS_INT variables, which is not true on typical 32-bit hosts.
+ The code messed up by falsely reporting buffer overflow in cases
+ such as (insert-file-contents "big" nil 1 2) into an empty buffer
+ when "big" contains more than 2**29 bytes, even though this
+ inserts just one byte and does not overflow the buffer.
+ (Finsert_file_contents): Store file offsets as off_t
+ values, not as EMACS_INT values. Check for overflow when
+ converting between EMACS_INT and off_t. When checking for
+ buffer overflow or for overlap, take the offsets into account.
+ Don't use EMACS_INT for small values where int suffices.
+ When checking for overlap, fix a typo: ZV was used where
+ ZV_BYTE was intended.
+ (Fwrite_region): Don't assume off_t fits into 'long'.
+ * buffer.h (struct buffer.modtime_size): Now off_t, not EMACS_INT.
+
2011-08-30 Chong Yidong <cyd@stupidchicken.com>
* syntax.c (find_defun_start): Update all cache variables if
=== modified file 'src/buffer.h'
--- src/buffer.h 2011-07-06 21:53:56 +0000
+++ src/buffer.h 2011-09-03 05:23:17 +0000
@@ -559,7 +559,7 @@
is still the same (since it's rounded up to seconds) but we're actually
not up-to-date. -1 means the size is unknown. Only meaningful if
modtime is actually set. */
- EMACS_INT modtime_size;
+ off_t modtime_size;
/* The value of text->modiff at the last auto-save. */
int auto_save_modified;
/* The value of text->modiff at the last display error.
=== modified file 'src/fileio.c'
--- src/fileio.c 2011-07-19 20:37:27 +0000
+++ src/fileio.c 2011-09-03 05:23:17 +0000
@@ -3179,6 +3179,7 @@
EMACS_INT inserted = 0;
int nochange = 0;
register EMACS_INT how_much;
+ off_t beg_offset, end_offset;
register EMACS_INT unprocessed;
int count = SPECPDL_INDEX ();
struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
@@ -3284,15 +3285,6 @@
record_unwind_protect (close_file_unwind, make_number (fd));
- /* Check whether the size is too large or negative, which can happen on a
- platform that allows file sizes greater than the maximum off_t value. */
- if (! not_regular
- && ! (0 <= st.st_size && st.st_size <= BUF_BYTES_MAX))
- buffer_overflow ();
-
- /* Prevent redisplay optimizations. */
- current_buffer->clip_changed = 1;
-
if (!NILP (visit))
{
if (!NILP (beg) || !NILP (end))
@@ -3302,25 +3294,63 @@
}
if (!NILP (beg))
- CHECK_NUMBER (beg);
+ {
+ if (! (RANGED_INTEGERP (0, beg, TYPE_MAXIMUM (off_t))))
+ wrong_type_argument (intern ("file-offset"), beg);
+ beg_offset = XFASTINT (beg);
+ }
else
- XSETFASTINT (beg, 0);
+ beg_offset = 0;
if (!NILP (end))
- CHECK_NUMBER (end);
+ {
+ if (! (RANGED_INTEGERP (0, end, TYPE_MAXIMUM (off_t))))
+ wrong_type_argument (intern ("file-offset"), end);
+ end_offset = XFASTINT (end);
+ }
else
{
- if (! not_regular)
+ if (not_regular)
+ end_offset = TYPE_MAXIMUM (off_t);
+ else
{
- XSETINT (end, st.st_size);
+ end_offset = st.st_size;
+
+ /* A negative size can happen on a platform that allows file
+ sizes greater than the maximum off_t value. */
+ if (end_offset < 0)
+ buffer_overflow ();
/* The file size returned from stat may be zero, but data
may be readable nonetheless, for example when this is a
file in the /proc filesystem. */
- if (st.st_size == 0)
- XSETINT (end, READ_BUF_SIZE);
- }
- }
+ if (end_offset == 0)
+ end_offset = READ_BUF_SIZE;
+ }
+ }
+
+ /* Check now whether the buffer will become too large,
+ in the likely case where the file's length is not changing.
+ This saves a lot of needless work before a buffer overflow. */
+ if (! not_regular)
+ {
+ /* The likely offset where we will stop reading. We could read
+ more (or less), if the file grows (or shrinks) as we read it. */
+ off_t likely_end = min (end_offset, st.st_size);
+
+ if (beg_offset < likely_end)
+ {
+ ptrdiff_t buf_bytes =
+ Z_BYTE - (!NILP (replace) ? ZV_BYTE - BEGV_BYTE : 0);
+ ptrdiff_t buf_growth_max = BUF_BYTES_MAX - buf_bytes;
+ off_t likely_growth = likely_end - beg_offset;
+ if (buf_growth_max < likely_growth)
+ buffer_overflow ();
+ }
+ }
+
+ /* Prevent redisplay optimizations. */
+ current_buffer->clip_changed = 1;
if (EQ (Vcoding_system_for_read, Qauto_save_coding))
{
@@ -3465,9 +3495,9 @@
give up on handling REPLACE in the optimized way. */
int giveup_match_end = 0;
- if (XINT (beg) != 0)
+ if (beg_offset != 0)
{
- if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0)
+ if (lseek (fd, beg_offset, SEEK_SET) < 0)
report_file_error ("Setting file position",
Fcons (orig_filename, Qnil));
}
@@ -3515,7 +3545,7 @@
immediate_quit = 0;
/* If the file matches the buffer completely,
there's no need to replace anything. */
- if (same_at_start - BEGV_BYTE == XINT (end))
+ if (same_at_start - BEGV_BYTE == end_offset)
{
emacs_close (fd);
specpdl_ptr--;
@@ -3530,16 +3560,17 @@
already found that decoding is necessary, don't waste time. */
while (!giveup_match_end)
{
- EMACS_INT total_read, nread, bufpos, curpos, trial;
+ int total_read, nread, bufpos, trial;
+ off_t curpos;
/* At what file position are we now scanning? */
- curpos = XINT (end) - (ZV_BYTE - same_at_end);
+ curpos = end_offset - (ZV_BYTE - same_at_end);
/* If the entire file matches the buffer tail, stop the scan. */
if (curpos == 0)
break;
/* How much can we scan in the next step? */
trial = min (curpos, sizeof buffer);
- if (emacs_lseek (fd, curpos - trial, SEEK_SET) < 0)
+ if (lseek (fd, curpos - trial, SEEK_SET) < 0)
report_file_error ("Setting file position",
Fcons (orig_filename, Qnil));
@@ -3606,13 +3637,14 @@
/* Don't try to reuse the same piece of text twice. */
overlap = (same_at_start - BEGV_BYTE
- - (same_at_end + st.st_size - ZV));
+ - (same_at_end
+ + (! NILP (end) ? end_offset : st.st_size) - ZV_BYTE));
if (overlap > 0)
same_at_end += overlap;
/* Arrange to read only the nonmatching middle part of the file. */
- XSETFASTINT (beg, XINT (beg) + (same_at_start - BEGV_BYTE));
- XSETFASTINT (end, XINT (end) - (ZV_BYTE - same_at_end));
+ beg_offset += same_at_start - BEGV_BYTE;
+ end_offset -= ZV_BYTE - same_at_end;
del_range_byte (same_at_start, same_at_end, 0);
/* Insert from the file at the proper position. */
@@ -3657,7 +3689,7 @@
/* First read the whole file, performing code conversion into
CONVERSION_BUFFER. */
- if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0)
+ if (lseek (fd, beg_offset, SEEK_SET) < 0)
report_file_error ("Setting file position",
Fcons (orig_filename, Qnil));
@@ -3824,7 +3856,7 @@
}
if (! not_regular)
- total = XINT (end) - XINT (beg);
+ total = end_offset - beg_offset;
else
/* For a special file, all we can do is guess. */
total = READ_BUF_SIZE;
@@ -3845,9 +3877,9 @@
if (GAP_SIZE < total)
make_gap (total - GAP_SIZE);
- if (XINT (beg) != 0 || !NILP (replace))
+ if (beg_offset != 0 || !NILP (replace))
{
- if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0)
+ if (lseek (fd, beg_offset, SEEK_SET) < 0)
report_file_error ("Setting file position",
Fcons (orig_filename, Qnil));
}
@@ -4576,7 +4608,7 @@
if (!NILP (append) && !NILP (Ffile_regular_p (filename)))
{
- long ret;
+ off_t ret;
if (NUMBERP (append))
ret = emacs_lseek (desc, XINT (append), SEEK_CUR);
next reply other threads:[~2011-09-03 5:29 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-09-03 5:29 Paul Eggert [this message]
2011-09-06 15:40 ` bug#9428: fix committed to trunk Paul Eggert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4E61BB40.7010605@cs.ucla.edu \
--to=eggert@cs.ucla.edu \
--cc=9428@debbugs.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this external index
https://git.savannah.gnu.org/cgit/emacs.git
https://git.savannah.gnu.org/cgit/emacs/org-mode.git
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.