unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* Adjust point to move it off zero-width characters
@ 2011-08-06 14:47 Eli Zaretskii
  2011-08-07 16:13 ` Stefan Monnier
  0 siblings, 1 reply; 8+ messages in thread
From: Eli Zaretskii @ 2011-08-06 14:47 UTC (permalink / raw)
  To: emacs-devel

Is the patch below a good idea?  If so, is it okay to install it on
the trunk?  It will allow us to set the glyphless-char-display of LRM,
RLM, and the other directional control characters to zero-width and
thus make them invisible without having the cursor get "stuck" on
them and without using invisible or intangible text properties.

=== modified file 'src/dispextern.h'
--- src/dispextern.h	2011-08-05 11:04:44 +0000
+++ src/dispextern.h	2011-08-06 14:39:22 +0000
@@ -3098,6 +3098,9 @@ extern int cursor_in_mouse_face_p (struc
 extern void tty_draw_row_with_mouse_face (struct window *, struct glyph_row *,
 					  int, int, enum draw_glyphs_face);
 
+extern Lisp_Object Qzero_width;
+extern Lisp_Object glyphless_char_display_method (int, struct frame *);
+
 /* Flags passed to try_window.  */
 #define TRY_WINDOW_CHECK_MARGINS	(1 << 0)
 #define TRY_WINDOW_IGNORE_FONTS_CHANGE	(1 << 1)
@@ -3113,6 +3116,7 @@ void compute_fringe_widths (struct frame
 void w32_init_fringe (struct redisplay_interface *);
 void w32_reset_fringes (void);
 #endif
+
 /* Defined in image.c */
 
 #ifdef HAVE_WINDOW_SYSTEM

=== modified file 'src/keyboard.c'
--- src/keyboard.c	2011-08-04 17:04:39 +0000
+++ src/keyboard.c	2011-08-06 14:39:31 +0000
@@ -1721,11 +1721,13 @@ adjust_point_for_property (EMACS_INT las
      user can keep inserting another character at point or keep
      deleting characters around point.  */
   int check_composition = ! modified, check_display = 1, check_invisible = 1;
+  int check_glyphless = !modified;
   EMACS_INT orig_pt = PT;
 
   /* FIXME: cycling is probably not necessary because these properties
      can't be usefully combined anyway.  */
-  while (check_composition || check_display || check_invisible)
+  while (check_composition || check_display || check_invisible
+	 || check_glyphless)
     {
       /* FIXME: check `intangible'.  */
       if (check_composition
@@ -1733,7 +1735,7 @@ adjust_point_for_property (EMACS_INT las
 	  && (beg = composition_adjust_point (last_pt, PT)) != PT)
 	{
 	  SET_PT (beg);
-	  check_display = check_invisible = 1;
+	  check_display = check_invisible = check_glyphless = 1;
 	}
       check_composition = 0;
       if (check_display
@@ -1752,7 +1754,7 @@ adjust_point_for_property (EMACS_INT las
 	  SET_PT (PT < last_pt
 		  ? (STRINGP (val) && SCHARS (val) == 0 ? beg - 1 : beg)
 		  : end);
-	  check_composition = check_invisible = 1;
+	  check_composition = check_invisible = check_glyphless = 1;
 	}
       check_display = 0;
       if (check_invisible && PT > BEGV && PT < ZV)
@@ -1823,7 +1825,7 @@ adjust_point_for_property (EMACS_INT las
 			 was already in the range: we don't get to choose
 			 which end of the range we have to go to.  */
 		      : (PT < last_pt ? beg : end));
-	      check_composition = check_display = 1;
+	      check_composition = check_display = check_glyphless = 1;
 	    }
 #if 0 /* This assertion isn't correct, because SET_PT may end up setting
 	 the point to something other than its argument, due to
@@ -1836,9 +1838,11 @@ adjust_point_for_property (EMACS_INT las
 	  if (!modified && !ellipsis && beg < end)
 	    {
 	      if (last_pt == beg && PT == end && end < ZV)
-		(check_composition = check_display = 1, SET_PT (end + 1));
+		(check_composition = check_display = check_glyphless = 1,
+		 SET_PT (end + 1));
 	      else if (last_pt == end && PT == beg && beg > BEGV)
-		(check_composition = check_display = 1, SET_PT (beg - 1));
+		(check_composition = check_display = check_glyphless = 1,
+		 SET_PT (beg - 1));
 	      else if (PT == ((PT < last_pt) ? beg : end))
 		/* We've already moved as far as we can.  Trying to go
 		   to the other end would mean moving backwards and thus
@@ -1851,11 +1855,30 @@ adjust_point_for_property (EMACS_INT las
 			   (make_number (PT == beg ? end : beg),
 			    Qinvisible, Qnil),
 			   !TEXT_PROP_MEANS_INVISIBLE (val)))
-		(check_composition = check_display = 1,
+		(check_composition = check_display = check_glyphless = 1,
 		 SET_PT (PT == beg ? end : beg));
 	    }
 	}
       check_invisible = 0;
+      if (check_glyphless)
+	{
+	  Lisp_Object glyphless_method;
+
+	  check_glyphless = 0;
+	  if (PT >= BEGV && PT < ZV)
+	    {
+	      glyphless_method =
+		glyphless_char_display_method (FETCH_CHAR (PT_BYTE),
+					       XFRAME (selected_frame));
+
+	      if (EQ (glyphless_method, Qzero_width))
+		{
+		  SET_PT (PT > BEGV && PT < last_pt ? PT - 1 : PT + 1);
+		  check_glyphless = 1;
+		  check_composition = check_display = check_invisible = 1;
+		}
+	    }
+	}
     }
 }
 

=== modified file 'src/xdisp.c'
--- src/xdisp.c	2011-08-06 11:49:35 +0000
+++ src/xdisp.c	2011-08-06 14:00:43 +0000
@@ -770,7 +770,8 @@ Lisp_Object Qglyphless_char;
 static Lisp_Object Qglyphless_char_display;
 
 /* Method symbols for Vglyphless_char_display.  */
-static Lisp_Object Qhex_code, Qempty_box, Qthin_space, Qzero_width;
+static Lisp_Object Qhex_code, Qempty_box, Qthin_space;
+Lisp_Object Qzero_width;
 
 /* Default pixel width of `thin-space' display method.  */
 #define THIN_SPACE_WIDTH 1
@@ -6019,16 +6020,11 @@ static int (* get_next_element[NUM_IT_ME
 				 FACE_FROM_ID ((IT)->f, (IT)->face_id),	\
 				 (IT)->string)))
 
-
-/* Lookup the char-table Vglyphless_char_display for character C (-1
-   if we want information for no-font case), and return the display
-   method symbol.  By side-effect, update it->what and
-   it->glyphless_method.  This function is called from
-   get_next_display_element for each character element, and from
-   x_produce_glyphs when no suitable font was found.  */
-
+/* A subroutine of lookup_glyphless_char_display.  Also called from
+   keyboard.c, to adjust point position when it is on a glyphless
+   character whose display uses the zero-width method.  */
 Lisp_Object
-lookup_glyphless_char_display (int c, struct it *it)
+glyphless_char_display_method (int c, struct frame *f)
 {
   Lisp_Object glyphless_method = Qnil;
 
@@ -6039,13 +6035,27 @@ lookup_glyphless_char_display (int c, st
 	{
 	  glyphless_method = CHAR_TABLE_REF (Vglyphless_char_display, c);
 	  if (CONSP (glyphless_method))
-	    glyphless_method = FRAME_WINDOW_P (it->f)
+	    glyphless_method = FRAME_WINDOW_P (f)
 	      ? XCAR (glyphless_method)
 	      : XCDR (glyphless_method);
 	}
       else
 	glyphless_method = XCHAR_TABLE (Vglyphless_char_display)->extras[0];
     }
+  return glyphless_method;
+}
+
+/* Lookup the char-table Vglyphless_char_display for character C (-1
+   if we want information for no-font case), and return the display
+   method symbol.  By side-effect, update it->what and
+   it->glyphless_method.  This function is called from
+   get_next_display_element for each character element, and from
+   x_produce_glyphs when no suitable font was found.  */
+
+Lisp_Object
+lookup_glyphless_char_display (int c, struct it *it)
+{
+  Lisp_Object glyphless_method = glyphless_char_display_method (c, it->f);
 
  retry:
   if (NILP (glyphless_method))




^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Adjust point to move it off zero-width characters
  2011-08-06 14:47 Adjust point to move it off zero-width characters Eli Zaretskii
@ 2011-08-07 16:13 ` Stefan Monnier
  2011-08-07 16:39   ` Eli Zaretskii
  0 siblings, 1 reply; 8+ messages in thread
From: Stefan Monnier @ 2011-08-07 16:13 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

> Is the patch below a good idea?  If so, is it okay to install it on
> the trunk?  It will allow us to set the glyphless-char-display of LRM,
> RLM, and the other directional control characters to zero-width and
> thus make them invisible without having the cursor get "stuck" on
> them and without using invisible or intangible text properties.

Looks OK to me, tho I have a question:

> +      if (check_glyphless)
> +	{
> +	  Lisp_Object glyphless_method;
> +
> +	  check_glyphless = 0;
> +	  if (PT >= BEGV && PT < ZV)
> +	    {
> +	      glyphless_method =
> +		glyphless_char_display_method (FETCH_CHAR (PT_BYTE),
> +					       XFRAME (selected_frame));

Please merge the declaration into the initialization.

> +	      if (EQ (glyphless_method, Qzero_width))
> +		{
> +		  SET_PT (PT > BEGV && PT < last_pt ? PT - 1 : PT + 1);
> +		  check_glyphless = 1;
> +		  check_composition = check_display = check_invisible = 1;
> +		}

This means that point is placed either after the LRM or before the char
preceding the LRM, depending only on the direction of the movement of
the last command.  I.e. the LRM gets "fused" with the preceding char.

Is there a reason to fuse it with the preceding char rather than fusing
it with the next char?


        Stefan



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Adjust point to move it off zero-width characters
  2011-08-07 16:13 ` Stefan Monnier
@ 2011-08-07 16:39   ` Eli Zaretskii
  2011-08-07 16:46     ` Eli Zaretskii
  2011-08-08  0:50     ` Stefan Monnier
  0 siblings, 2 replies; 8+ messages in thread
From: Eli Zaretskii @ 2011-08-07 16:39 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Cc: emacs-devel@gnu.org
> Date: Sun, 07 Aug 2011 12:13:10 -0400
> 
> > +	      if (EQ (glyphless_method, Qzero_width))
> > +		{
> > +		  SET_PT (PT > BEGV && PT < last_pt ? PT - 1 : PT + 1);
> > +		  check_glyphless = 1;
> > +		  check_composition = check_display = check_invisible = 1;
> > +		}
> 
> This means that point is placed either after the LRM or before the char
> preceding the LRM, depending only on the direction of the movement of
> the last command.  I.e. the LRM gets "fused" with the preceding char.
> 
> Is there a reason to fuse it with the preceding char rather than fusing
> it with the next char?

I think you are right.  I will rework the patch to do that.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Adjust point to move it off zero-width characters
  2011-08-07 16:39   ` Eli Zaretskii
@ 2011-08-07 16:46     ` Eli Zaretskii
  2011-08-07 17:10       ` Eli Zaretskii
  2011-08-08  0:50     ` Stefan Monnier
  1 sibling, 1 reply; 8+ messages in thread
From: Eli Zaretskii @ 2011-08-07 16:46 UTC (permalink / raw)
  To: monnier; +Cc: emacs-devel

> Date: Sun, 07 Aug 2011 19:39:14 +0300
> From: Eli Zaretskii <eliz@gnu.org>
> Cc: emacs-devel@gnu.org
> 
> > From: Stefan Monnier <monnier@iro.umontreal.ca>
> > Cc: emacs-devel@gnu.org
> > Date: Sun, 07 Aug 2011 12:13:10 -0400
> > 
> > > +	      if (EQ (glyphless_method, Qzero_width))
> > > +		{
> > > +		  SET_PT (PT > BEGV && PT < last_pt ? PT - 1 : PT + 1);
> > > +		  check_glyphless = 1;
> > > +		  check_composition = check_display = check_invisible = 1;
> > > +		}
> > 
> > This means that point is placed either after the LRM or before the char
> > preceding the LRM, depending only on the direction of the movement of
> > the last command.  I.e. the LRM gets "fused" with the preceding char.
> > 
> > Is there a reason to fuse it with the preceding char rather than fusing
> > it with the next char?
> 
> I think you are right.  I will rework the patch to do that.

Actually, there could be a problem here: "C-x =" will tell point is on
an LRM, which will confuse users, because they see the cursor on the
following character.

Do we need to change "C-x =" as well?



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Adjust point to move it off zero-width characters
  2011-08-07 16:46     ` Eli Zaretskii
@ 2011-08-07 17:10       ` Eli Zaretskii
  0 siblings, 0 replies; 8+ messages in thread
From: Eli Zaretskii @ 2011-08-07 17:10 UTC (permalink / raw)
  To: monnier; +Cc: emacs-devel

> Date: Sun, 07 Aug 2011 19:46:42 +0300
> From: Eli Zaretskii <eliz@gnu.org>
> Cc: emacs-devel@gnu.org
> 
> > > Is there a reason to fuse it with the preceding char rather than fusing
> > > it with the next char?
> > 
> > I think you are right.  I will rework the patch to do that.
> 
> Actually, there could be a problem here: "C-x =" will tell point is on
> an LRM, which will confuse users, because they see the cursor on the
> following character.

There's one other complication: to _really_ DTRT, there's one
directional control that needs to be fused with the preceding
character: the PDF, u+202c (POP DIRECTIONAL FORMATTING).  That's
because copy/paste of a portion of text enclosed in, say, LRO...PDF
should keep the visual appearance of that portion of text, and for
that you need to copy it with LRO before and PDF after.

But hard-coding the special treatment of PDF on the C level sounds too
kludgey, no?



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Adjust point to move it off zero-width characters
  2011-08-07 16:39   ` Eli Zaretskii
  2011-08-07 16:46     ` Eli Zaretskii
@ 2011-08-08  0:50     ` Stefan Monnier
  2011-08-08  3:02       ` Eli Zaretskii
  1 sibling, 1 reply; 8+ messages in thread
From: Stefan Monnier @ 2011-08-08  0:50 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

>> > +	      if (EQ (glyphless_method, Qzero_width))
>> > +		{
>> > +		  SET_PT (PT > BEGV && PT < last_pt ? PT - 1 : PT + 1);
>> > +		  check_glyphless = 1;
>> > +		  check_composition = check_display = check_invisible = 1;
>> > +		}
>> This means that point is placed either after the LRM or before the char
>> preceding the LRM, depending only on the direction of the movement of
>> the last command.  I.e. the LRM gets "fused" with the preceding char.
>> Is there a reason to fuse it with the preceding char rather than fusing
>> it with the next char?
> I think you are right.  I will rework the patch to do that.

So you're saying that it should always be fused with the next char?
Then my question becomes:
 Is there a reason to fuse it with the next char rather than fusing
 it with the preceding char? ;-)
The answer needs to be in the source code's comments.
 
> Actually, there could be a problem here: "C-x =" will tell point is on
> an LRM, which will confuse users, because they see the cursor on the
> following character.

Yes, that's a general problem with point adjustments.

> Do we need to change "C-x =" as well?

Yup.  C-x h already handles compositions, but it should be extended to
show invisible text (and zero-width glyphs) immediately before and
after point.  This is not directly related to your suggestion, tho.

> There's one other complication: to _really_ DTRT, there's one
> directional control that needs to be fused with the preceding
> character: the PDF, u+202c (POP DIRECTIONAL FORMATTING).  That's

That was my impression as well: the fusing should not always be done in
the same direction.
I'd still like to hear your reasoning about why it only depends on the
particular zero-width char and not on the surrounding context.

> But hard-coding the special treatment of PDF on the C level sounds too
> kludgey, no?

Without knowing why you think LRM needs to be fused one way and PDF in
the other way, it's hard to know for sure whether hard-coding them is
a kludge, but yes: it doe sound likely to be so.


        Stefan



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Adjust point to move it off zero-width characters
  2011-08-08  0:50     ` Stefan Monnier
@ 2011-08-08  3:02       ` Eli Zaretskii
  2011-08-08  4:12         ` Stefan Monnier
  0 siblings, 1 reply; 8+ messages in thread
From: Eli Zaretskii @ 2011-08-08  3:02 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Cc: emacs-devel@gnu.org
> Date: Sun, 07 Aug 2011 20:50:23 -0400
> 
> So you're saying that it should always be fused with the next char?
> Then my question becomes:
>  Is there a reason to fuse it with the next char rather than fusing
>  it with the preceding char? ;-)

There is a good reason for the directional control characters, see
below.  For others, I think it's immaterial.

> > There's one other complication: to _really_ DTRT, there's one
> > directional control that needs to be fused with the preceding
> > character: the PDF, u+202c (POP DIRECTIONAL FORMATTING).  That's
> 
> That was my impression as well: the fusing should not always be done in
> the same direction.
> I'd still like to hear your reasoning about why it only depends on the
> particular zero-width char and not on the surrounding context.

I explained that, for the directional control characters: the need to
be able to interactively copy/paste text and be sure it displays the
same.  The directional controls affect the _following_ characters.  If
we "fuse" LRM/RLM/LRO/RLO/LRE/RLE with the previous character, they
can only be copy/pasted with the text that precedes them, where they
have no effect whatsoever.  That's why PDF is different: it affects
the characters that precede it.

> > But hard-coding the special treatment of PDF on the C level sounds too
> > kludgey, no?
> 
> Without knowing why you think LRM needs to be fused one way and PDF in
> the other way, it's hard to know for sure whether hard-coding them is
> a kludge, but yes: it doe sound likely to be so.

So what are the alternatives?  Another char-table?



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Adjust point to move it off zero-width characters
  2011-08-08  3:02       ` Eli Zaretskii
@ 2011-08-08  4:12         ` Stefan Monnier
  0 siblings, 0 replies; 8+ messages in thread
From: Stefan Monnier @ 2011-08-08  4:12 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

>> That was my impression as well: the fusing should not always be done
>> in the same direction.  I'd still like to hear your reasoning about
>> why it only depends on the particular zero-width char and not on the
>> surrounding context.
> I explained that, for the directional control characters: the need to
> be able to interactively copy/paste text and be sure it displays the
> same.  The directional controls affect the _following_ characters.  If
> we "fuse" LRM/RLM/LRO/RLO/LRE/RLE with the previous character, they
> can only be copy/pasted with the text that precedes them, where they
> have no effect whatsoever.  That's why PDF is different: it affects
> the characters that precede it.

Ah, so they're kind of like combining characters.  Good.
In that case I think hardcoding PDF in the code is perfectly acceptable,
with a commend explaining what's going on.


        Stefan



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-08-08  4:12 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-06 14:47 Adjust point to move it off zero-width characters Eli Zaretskii
2011-08-07 16:13 ` Stefan Monnier
2011-08-07 16:39   ` Eli Zaretskii
2011-08-07 16:46     ` Eli Zaretskii
2011-08-07 17:10       ` Eli Zaretskii
2011-08-08  0:50     ` Stefan Monnier
2011-08-08  3:02       ` Eli Zaretskii
2011-08-08  4:12         ` Stefan Monnier

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).