From 99b128c6d0e7dc8af6ae3430364f3c92d2686009 Mon Sep 17 00:00:00 2001 From: Micah Chalmer Date: Sat, 25 Jul 2015 17:21:44 -0400 Subject: Fix slowness in angle bracket matching The problem was caused by the logic that made it refontify the whole buffer when "font-lock-syntactically-fontified" was set to a position before the start of the region to be potentially fontified. Unfortunately that variable is not reliably set when fontifying a large buffer. Fortunately, the new logic is much simpler, and font-lock already takes care of ensuring that everything before font-lock-beg was syntactically fontified. The other problem was calling syntax-ppss on positions known not to be fontified yet. This fixes both of these issues, and the angle bracket matching now works on larger buffers without pausing on every keystroke. --- rust-mode.el | 196 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 99 insertions(+), 97 deletions(-) diff --git a/rust-mode.el b/rust-mode.el index e451dfa..9e20aa2 100644 --- a/rust-mode.el +++ b/rust-mode.el @@ -428,24 +428,18 @@ part of it. Adjusts to include the r[#] of a raw string as well." - (let ((orig-beg font-lock-beg) - (orig-end font-lock-end)) - (cond - ;; If we are not syntactically fontified yet, we cannot correctly cover - ;; anything less than the full buffer. The syntactic fontification - ;; modifies the syntax, so until it's done we can't use the syntax to - ;; determine what to fontify. - ((< (or font-lock-syntactically-fontified 0) font-lock-end) - (setq font-lock-beg 1) - (setq font-lock-end (buffer-end 1))) - - ((let* ((beg-ppss (syntax-ppss font-lock-beg)) - (beg-in-cmnt (and (nth 4 beg-ppss) (nth 8 beg-ppss))) - (beg-in-str (nth 3 beg-ppss)) - (end-ppss (syntax-ppss font-lock-end)) - (end-in-str (nth 3 end-ppss))) - - (when (and beg-in-str (> font-lock-beg (nth 8 beg-ppss))) + (save-excursion + (let ((orig-beg font-lock-beg) + (orig-end font-lock-end)) + + (let* + ;; It's safe to call `syntax-ppss' here on positions that are + ;; already syntactically fontified + ((beg-ppss (syntax-ppss font-lock-beg)) + (beg-in-cmnt (and beg-ppss (nth 4 beg-ppss) (nth 8 beg-ppss))) + (beg-in-str (and beg-ppss (nth 3 beg-ppss) (nth 8 beg-ppss)))) + + (when (and beg-in-str (>= font-lock-beg beg-in-str)) (setq font-lock-beg (nth 8 beg-ppss)) (while (equal ?# (char-before font-lock-beg)) (setq font-lock-beg (1- font-lock-beg))) @@ -453,18 +447,24 @@ (setq font-lock-beg (1- font-lock-beg)))) (when (and beg-in-cmnt (> font-lock-beg beg-in-cmnt)) - (setq font-lock-beg beg-in-cmnt)) - - (when end-in-str - (save-excursion - (goto-char (nth 8 end-ppss)) - (ignore-errors (forward-sexp)) - (setq font-lock-end (max font-lock-end (point))))) - ))) - - (or (/= font-lock-beg orig-beg) - (/= font-lock-end orig-end)) - )) + (setq font-lock-beg beg-in-cmnt))) + + ;; We need to make sure that if the region ends inside a raw string, we + ;; extend it out past the end of it. But we can't use `syntax-ppss' to + ;; detect that, becaue that depends on font-lock already being done, and we + ;; are trying to figure out how much to font-lock before that. So we use + ;; the regexp directly. + (save-match-data + (goto-char font-lock-beg) + (while (and (< (point) font-lock-end) + (re-search-forward rust-re-non-standard-string (buffer-end 1) t) + (<= (match-beginning 0) font-lock-end)) + (setq font-lock-end (max font-lock-end (match-end 0))) + (goto-char (1+ (match-beginning 0))))) + + (or (/= font-lock-beg orig-beg) + (/= font-lock-end orig-end)) + ))) (defun rust-conditional-re-search-forward (regexp bound condition) ;; Search forward for regexp (with bound). If found, call condition and return the found @@ -492,77 +492,79 @@ (set-match-data (nth 1 ret-list)) (nth 0 ret-list)))) +(defconst rust-re-non-standard-string + (rx + (or + ;; Raw string: if it matches, it ends up with the starting character + ;; of the string as group 1, any ending backslashes as group 4, and + ;; the ending character as either group 5 or group 6. + (seq + ;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically: + (group "r") + + ;; Then either: + (or + ;; a sequence at least one "#" (followed by quote). Capture all + ;; but the last "#" as group 2 for this case. + (seq (group (* "#")) "#\"") + + ;; ...or a quote without any "#". Capture it as group 3. This is + ;; used later to match the opposite quote only if this capture + ;; occurred + (group "\"")) + + ;; The contents of the string: + (*? anything) + + ;; If there are any backslashes at the end of the string, capture + ;; them as group 4 so we can suppress the normal escape syntax + ;; parsing: + (group (* "\\")) + + ;; Then the end of the string--the backreferences ensure that we + ;; only match the kind of ending that corresponds to the beginning + ;; we had: + (or + ;; There were "#"s - capture the last one as group 5 to mark it as + ;; the end of the string: + (seq "\"" (backref 2) (group "#")) + + ;; No "#"s - capture the ending quote (using a backref to group 3, + ;; so that we can't match a quote if we had "#"s) as group 6 + (group (backref 3)))) + + ;; Character literal: match the beginning ' of a character literal + ;; as group 7, and the ending one as group 8 + (seq + (group "'") + (or + (seq + "\\" + (or + (: "U" (= 8 xdigit)) + (: "u" (= 4 xdigit)) + (: "x" (= 2 xdigit)) + (any "'nrt0\"\\"))) + (not (any "'\\")) + ) + (group "'")) + ) + )) + (defun rust-look-for-non-standard-string (bound) ;; Find a raw string or character literal, but only if it's not in the middle ;; of another string or a comment. - (let* ((non-standard-str-regexp - (rx - (or - ;; Raw string: if it matches, it ends up with the starting character - ;; of the string as group 1, any ending backslashes as group 4, and - ;; the ending character as either group 5 or group 6. - (seq - ;; The "r" starts the raw string. Capture it as group 1 to mark it as such syntactically: - (group "r") - - ;; Then either: - (or - ;; a sequence at least one "#" (followed by quote). Capture all - ;; but the last "#" as group 2 for this case. - (seq (group (* "#")) "#\"") - - ;; ...or a quote without any "#". Capture it as group 3. This is - ;; used later to match the opposite quote only if this capture - ;; occurred - (group "\"")) - - ;; The contents of the string: - (*? anything) - - ;; If there are any backslashes at the end of the string, capture - ;; them as group 4 so we can suppress the normal escape syntax - ;; parsing: - (group (* "\\")) - - ;; Then the end of the string--the backreferences ensure that we - ;; only match the kind of ending that corresponds to the beginning - ;; we had: - (or - ;; There were "#"s - capture the last one as group 5 to mark it as - ;; the end of the string: - (seq "\"" (backref 2) (group "#")) - - ;; No "#"s - capture the ending quote (using a backref to group 3, - ;; so that we can't match a quote if we had "#"s) as group 6 - (group (backref 3)))) - - ;; Character literal: match the beginning ' of a character literal - ;; as group 7, and the ending one as group 8 - (seq - (group "'") - (or - (seq - "\\" - (or - (: "U" (= 8 xdigit)) - (: "u" (= 4 xdigit)) - (: "x" (= 2 xdigit)) - (any "'nrt0\"\\"))) - (not (any "'\\")) - ) - (group "'")) - ) - ))) - (rust-conditional-re-search-forward - non-standard-str-regexp bound - (lambda () - (let ((pstate (syntax-ppss (match-beginning 0)))) - (not - (or - (nth 4 pstate) ;; Skip if in a comment - (and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here - ))))))) + (rust-conditional-re-search-forward + rust-re-non-standard-string + bound + (lambda () + (let ((pstate (syntax-ppss (match-beginning 0)))) + (not + (or + (nth 4 pstate) ;; Skip if in a comment + (and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate) (match-beginning 0))) ;; Skip if in a string that isn't starting here + )))))) (defun rust-syntax-class-before-point () (when (> (point) 1) -- cgit v1.2.3 From ec3855f1c6895eb12f140a84f9dd98392c4566a8 Mon Sep 17 00:00:00 2001 From: Micah Chalmer Date: Thu, 30 Jul 2015 20:52:06 -0400 Subject: Recognize runaway raw strings Recognize raw strings all the way to the end of the buffer if they are not closed. This is not valid rust code, but the highlighting should show the mistake. This also eliminates glitchy behavior that can occur in this situation. Emacs assumes that edits can't change syntax at positions before the edit, and raw strings without this change violated this. --- rust-mode-tests.el | 35 +++++++++++++++++++++++++++++++++++ rust-mode.el | 5 ++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/rust-mode-tests.el b/rust-mode-tests.el index 5312f38..8f25f06 100644 --- a/rust-mode-tests.el +++ b/rust-mode-tests.el @@ -1110,6 +1110,41 @@ this_is_not_a_string();)" (should (equal nil (get-text-property 28 'face))) ;; Semicolon--should not be part of the string )) +(ert-deftest font-lock-runaway-raw-string () + (rust-test-font-lock + "const Z = r#\"my raw string\";\n// oops this is still in the string" + '("const" font-lock-keyword-face + "Z" font-lock-type-face + "r#\"my raw string\";\n// oops this is still in the string" font-lock-string-face)) + ) + +(ert-deftest font-lock-recognize-closing-raw-string () + (with-temp-buffer + (rust-mode) + (insert "const foo = r##\" +1...............................................50 +1...............................................50 +1...............................................50 +1...............195-->\"; let ...................50 +1...............................................50 +1...............................................50 +1...............................................50 +1...............................................50 +1...............................................50 +1......................500......................50 +\"#; +") + (font-lock-fontify-buffer) + (goto-char 530) + (insert "#") + ;; We have now closed the raw string. Check that the whole string is + ;; recognized after the change + (font-lock-after-change-function (1- (point)) (point) 0) + (should (equal 'font-lock-string-face (get-text-property 195 'face))) ;; The "let" + (should (equal 'font-lock-string-face (get-text-property 500 'face))) ;; The "500" + (should (equal nil (get-text-property 531 'face))) ;; The second ";" + )) + ;;; Documentation comments (ert-deftest font-lock-doc-line-comment-parent () diff --git a/rust-mode.el b/rust-mode.el index 9e20aa2..9899ad1 100644 --- a/rust-mode.el +++ b/rust-mode.el @@ -531,7 +531,10 @@ ;; No "#"s - capture the ending quote (using a backref to group 3, ;; so that we can't match a quote if we had "#"s) as group 6 - (group (backref 3)))) + (group (backref 3)) + + ;; If the raw string wasn't actually closed, go all the way to the end + string-end)) ;; Character literal: match the beginning ' of a character literal ;; as group 7, and the ending one as group 8 -- cgit v1.2.3