* limitations of hard-coded field separator removed
@ 2012-02-23 13:25 Andreas Röhler
2012-03-27 21:44 ` Bastien
0 siblings, 1 reply; 2+ messages in thread
From: Andreas Röhler @ 2012-02-23 13:25 UTC (permalink / raw)
To: emacs-orgmode
[-- Attachment #1: Type: text/plain, Size: 465 bytes --]
Hi,
attached a
org-table-import.patch
removes limitations of hard-coded separator char(s).
`org-guess-separator' accepts and detects all chars as field separators.
It works based on the assumption, that char looked for appears in equal
number at each row.
Also a default value
`org-table-import-default-separator' should make
guessing faster in some cases.
Best regards,
Andreas
--
http://launchpad.net/python-mode
http://launchpad.net/s-x-emacs-werkstatt/
[-- Attachment #2: org-table-import.patch --]
[-- Type: text/x-patch, Size: 7190 bytes --]
diff --git a/lisp/org-table.el b/lisp/org-table.el
index 39cddab..6ad572b 100644
--- a/lisp/org-table.el
+++ b/lisp/org-table.el
@@ -334,6 +334,11 @@ available parameters."
:group 'org-table-import-export
:type 'string)
+(defcustom org-table-import-default-separator "\t"
+ "`org-table-import' may specify that value, avoid guessing."
+ :group 'org-table-import-export
+ :type 'string)
+
(defconst org-table-auto-recalculate-regexp "^[ \t]*| *# *\\(|\\|$\\)"
"Detects a table line marked for automatic recalculation.")
(defconst org-table-recalculate-regexp "^[ \t]*| *[#*] *\\(|\\|$\\)"
@@ -474,71 +479,72 @@ SIZE is a string Columns x Rows like for example \"3x2\"."
(goto-char pos)))
(org-table-align)))
+(defun org-guess-separator ()
+ "Guess the separator char of a given table.
+
+Works based on the assumption, that char looked for appears in equal numbers at each row. "
+ (interactive)
+ (save-excursion
+ (let ((orig (point))
+ char erg matches done pos first second)
+ (beginning-of-line)
+ ;; look first for `org-table-import-default-separator'
+ (when (re-search-forward org-table-import-default-separator nil t 1)
+ (setq erg (org-guess-separator-intern)))
+ (unless erg
+ (goto-char orig)
+ (when (re-search-forward "[[:punct:][:blank:]]" nil t 1)
+ (setq erg (org-guess-separator-intern))))
+ ;; maybe neither default nor of character-class punct
+ (unless erg
+ (goto-char orig)
+
+ (setq erg (org-guess-separator-intern)))
+ (when (interactive-p) (if (string= "\t" erg)
+ (message "%s" "\\t")
+ (message "%s" erg)))
+ erg)))
+
+(defun org-guess-separator-intern ()
+ (let (erg)
+ (while (and (not (eolp)) (not done))
+ (setq pos (point))
+ (setq char (progn (or (looking-back ".")(looking-at ".")) (match-string-no-properties 0)))
+ (setq matches (count-matches char (line-beginning-position) (line-end-position)))
+ (forward-line 1)
+ (if (eq matches (count-matches char (line-beginning-position) (line-end-position)))
+ (progn
+ (setq done t)
+ (setq erg char))
+ (goto-char pos)
+ (forward-char 1)))
+ erg))
+
(defun org-table-convert-region (beg0 end0 &optional separator)
"Convert region to a table.
-The region goes from BEG0 to END0, but these borders will be moved
-slightly, to make sure a beginning of line in the first line is included.
-
-SEPARATOR specifies the field separator in the lines. It can have the
-following values:
-
-'(4) Use the comma as a field separator
-'(16) Use a TAB as field separator
-integer When a number, use that many spaces as field separator
-nil When nil, the command tries to be smart and figure out the
- separator in the following way:
- - when each line contains a TAB, assume TAB-separated material
- - when each line contains a comma, assume CSV material
- - else, assume one or more SPACE characters as separator."
- (interactive "rP")
+
+Optional arg SEPARATOR prompts user to specify the separator char. "
+ (interactive "r\nP")
(let* ((beg (min beg0 end0))
- (end (max beg0 end0))
- re)
+ (end (copy-marker (max beg0 end0)))
+ (separator (cond ((and separator (stringp separator))
+ separator)
+ ((eq 4 (prefix-numeric-value separator))
+ (read-from-minibuffer "Separator char: ")))))
(goto-char beg)
- (beginning-of-line 1)
- (setq beg (move-marker (make-marker) (point)))
- (goto-char end)
- (if (bolp) (backward-char 1) (end-of-line 1))
- (setq end (move-marker (make-marker) (point)))
- ;; Get the right field separator
- (unless separator
- (goto-char beg)
- (setq separator
- (cond
- ((not (re-search-forward "^[^\n\t]+$" end t)) '(16))
- ((not (re-search-forward "^[^\n,]+$" end t)) '(4))
- (t 1))))
+ (unless separator (setq separator (org-guess-separator)))
(goto-char beg)
- (if (equal separator '(4))
- (while (< (point) end)
- ;; parse the csv stuff
- (cond
- ((looking-at "^") (insert "| "))
- ((looking-at "[ \t]*$") (replace-match " |") (beginning-of-line 2))
- ((looking-at "[ \t]*\"\\([^\"\n]*\\)\"")
- (replace-match "\\1")
- (if (looking-at "\"") (insert "\"")))
- ((looking-at "[^,\n]+") (goto-char (match-end 0)))
- ((looking-at "[ \t]*,") (replace-match " | "))
- (t (beginning-of-line 2))))
- (setq re (cond
- ((equal separator '(4)) "^\\|\"?[ \t]*,[ \t]*\"?")
- ((equal separator '(16)) "^\\|\t")
- ((integerp separator)
- (if (< separator 1)
- (error "Number of spaces in separator must be >= 1")
- (format "^ *\\| *\t *\\| \\{%d,\\}" separator)))
- (t (error "This should not happen"))))
- (while (re-search-forward re end t)
- (replace-match "| " t t)))
+ (while (re-search-forward separator end t)
+ (replace-match " | " t t))
(goto-char beg)
(org-table-align)))
-(defun org-table-import (file arg)
+(defun org-table-import (file &optional arg)
"Import FILE as a table.
-The file is assumed to be tab-separated. Such files can be produced by most
-spreadsheet and database applications. If no tabs (at least one per line)
-are found, lines will be split on whitespace into fields."
+
+Field separator is guessed by `org-guess-separator', value of `org-table-import-default-separator' is tried first.
+
+With optional \\[universal-argument] a prompt takes the separator, avoids guessing. "
(interactive "f\nP")
(or (bolp) (newline))
(let ((beg (point))
@@ -546,7 +552,6 @@ are found, lines will be split on whitespace into fields."
(insert-file-contents file)
(org-table-convert-region beg (+ (point) (- (point-max) pm)) arg)))
-
(defvar org-table-last-alignment)
(defvar org-table-last-column-widths)
(defun org-table-export (&optional file format)
@@ -1609,7 +1614,6 @@ should be done in reverse order."
(org-table-goto-column thiscol)
(message "%d lines sorted, based on column %d" (length lns) column)))
-
(defun org-table-cut-region (beg end)
"Copy region in table to the clipboard and blank all relevant fields.
If there is no active region, use just the field at point."
@@ -3813,7 +3817,6 @@ Use COMMAND to do the motion, repeat if necessary to end up in a data line."
;; active, this binding is ignored inside tables and replaced with a
;; modified self-insert.
-
(defvar orgtbl-mode-map (make-keymap)
"Keymap for `orgtbl-mode'.")
@@ -3979,7 +3982,6 @@ to execute outside of tables."
(orgtbl-make-binding 'org-table-previous-field 104
[(shift tab)] [(tab)] "\C-i"))
-
(unless (featurep 'xemacs)
(org-defkey orgtbl-mode-map [S-iso-lefttab]
(orgtbl-make-binding 'org-table-previous-field 107
@@ -4462,7 +4464,6 @@ Valid parameters are
:sep Separator between two fields
:remove-nil-lines Do not include lines that evaluate to nil.
-
Each in the following group may be either a string or a function
of no arguments returning a string:
:tstart String to start the table. Ignored when :splice is t.
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: limitations of hard-coded field separator removed
2012-02-23 13:25 limitations of hard-coded field separator removed Andreas Röhler
@ 2012-03-27 21:44 ` Bastien
0 siblings, 0 replies; 2+ messages in thread
From: Bastien @ 2012-03-27 21:44 UTC (permalink / raw)
To: Andreas Röhler; +Cc: emacs-orgmode
Hi Andreas,
Andreas Röhler <andreas.roehler@easy-emacs.de> writes:
> attached a org-table-import.patch
Thanks for this patch.
I test it against latest master and got this error:
,----
| Debugger entered--Lisp error: (wrong-type-argument stringp nil)
| re-search-forward(nil #<marker at 57 in tst.org> t)
| (while (re-search-forward separator end t) (replace-match " | " t t))
| (let* ((beg (min beg0 end0)) (end (copy-marker (max beg0 end0))) (separator (cond ((and separator (stringp separator)) separator) ((eq 4 (prefix-numeric-value separator)) (read-from-minibuffer "Separator char: "))))) (goto-char beg) (unless separator (setq separator (org-guess-separator))) (goto-char beg) (while (re-search-forward separator end t) (replace-match " | " t t)) (goto-char beg) (org-table-align))
| org-table-convert-region(14 57 nil)
| call-interactively(org-table-convert-region t nil)
| execute-extended-command(nil)
| call-interactively(execute-extended-command nil nil)
`----
Beside this problem...
> removes limitations of hard-coded separator char(s).
> `org-guess-separator' accepts and detects all chars as field separators.
> It works based on the assumption, that char looked for appears in equal
> number at each row.
This looks clever, but is it safe? E.g. if cells of the table are
surrounded by double-quotes, you'll have the same number of quotes in
each row, but the double-quote char by itself is _not_ the separator.
So I'm not sure about this heuristics -- want to ear/test more.
> Also a default value
> `org-table-import-default-separator' should make
> guessing faster in some cases.
That an easier way and I would favor it: keep the current behavior,
but add an option for a list of separators that the user can set.
Org would then test this list sequentially.
What do you think?
--
Bastien
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2012-03-27 23:46 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-23 13:25 limitations of hard-coded field separator removed Andreas Röhler
2012-03-27 21:44 ` Bastien
Code repositories for project(s) associated with this external index
https://git.savannah.gnu.org/cgit/emacs.git
https://git.savannah.gnu.org/cgit/emacs/org-mode.git
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.