diff --git a/CHANGELOG.md b/CHANGELOG.md index 80a7f02..09b696c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,25 @@ ## main (unreleased) +## 0.5.0 (2025-06-04) + +- [#96](https://github.com/clojure-emacs/clojure-ts-mode/pull/96): Highlight function name properly in `extend-protocol` form. +- [#96](https://github.com/clojure-emacs/clojure-ts-mode/pull/96): Add support for extend-protocol forms to `clojure-ts-add-arity` refactoring + command. +- [#99](https://github.com/clojure-emacs/clojure-ts-mode/pull/99): Improve navigation by s-expression by switching to an experimental + Clojure grammar. +- [#99](https://github.com/clojure-emacs/clojure-ts-mode/pull/99): More consistent docstrings highlighting and `fill-paragraph` behavior. +- [#99](https://github.com/clojure-emacs/clojure-ts-mode/pull/99): Fix bug in `clojure-ts-align` when nested form has extra spaces. +- [#99](https://github.com/clojure-emacs/clojure-ts-mode/pull/99): Fix bug in `clojure-ts-unwind` when there is only one expression after + threading symbol. +- [#103](https://github.com/clojure-emacs/clojure-ts-mode/issues/103): Introduce `clojure-ts-jank-use-cpp-parser` customization which allows + highlighting C++ syntax in Jank `native/raw` forms. +- [#103](https://github.com/clojure-emacs/clojure-ts-mode/issues/103): Introduce `clojure-ts-clojurescript-use-js-parser` customization which + allows highlighting JS syntax in ClojureScript `js*` forms. +- [#104](https://github.com/clojure-emacs/clojure-ts-mode/pull/104): Introduce the `clojure-ts-extra-def-forms` customization option to specify + additional `defn`-like forms that should be fontified. +- Introduce completion feature and `clojure-ts-completion-enabled` customization. + ## 0.4.0 (2025-05-15) - [#16](https://github.com/clojure-emacs/clojure-ts-mode/issues/16): Introduce `clojure-ts-align`. @@ -20,7 +39,7 @@ - [#92](https://github.com/clojure-emacs/clojure-ts-mode/pull/92): Add commands to convert between collections types. - [#93](https://github.com/clojure-emacs/clojure-ts-mode/pull/93): Introduce `clojure-ts-add-arity`. - [#94](https://github.com/clojure-emacs/clojure-ts-mode/pull/94): Add indentation rules and `clojure-ts-align` support for namespaced maps. -- Introduce `clojure-ts-cycle-conditional` and `clojure-ts-cycle-not`. +- [#95](https://github.com/clojure-emacs/clojure-ts-mode/pull/95): Introduce `clojure-ts-cycle-conditional` and `clojure-ts-cycle-not`. ## 0.3.0 (2025-04-15) @@ -68,37 +87,37 @@ ## 0.2.1 (2024-02-14) - [#36]: Rename all derived mode vars to match the package prefix. - - `clojurescript-ts-mode` -> `clojure-ts-clojurescript-mode` - - `clojurec-ts-mode` -> `clojure-ts-clojurec-mode` - - `clojure-dart-ts-mode` -> `clojure-ts-clojuredart-mode` - - `clojure-jank-ts-mode` -> `clojure-ts-jank-mode` + - `clojurescript-ts-mode` -> `clojure-ts-clojurescript-mode` + - `clojurec-ts-mode` -> `clojure-ts-clojurec-mode` + - `clojure-dart-ts-mode` -> `clojure-ts-clojuredart-mode` + - `clojure-jank-ts-mode` -> `clojure-ts-jank-mode` - [#30]: Add custom option `clojure-ts-toplevel-inside-comment-form` as an equivalent to `clojure-toplevel-inside-comment-form` in `clojure-mode`. - [#32]: Change behavior of `beginning-of-defun` and `end-of-defun` to consider all Clojure sexps as defuns. ## 0.2.0 - Pin grammar revision in treesit-language-source-alist - - [bd61a7fb281b7b0b1d2e20d19ab5d46cbcdc6c1e](https://github.com/clojure-emacs/clojure-ts-mode/commit/bd61a7fb281b7b0b1d2e20d19ab5d46cbcdc6c1e) -- Make font lock feature list more conforming with recommendations - - (See treesit-font-lock-level documentation for more information.) - - [2225190ee57ef667d69f2cd740e0137810bc38e7](https://github.com/clojure-emacs/clojure-ts-mode/commit/2225190ee57ef667d69f2cd740e0137810bc38e7) -- Highlight docstrings in interface, protocol, and variable definitions - - [9af0a6b35c708309acdfeb4c0c79061b0fd4eb44](https://github.com/clojure-emacs/clojure-ts-mode/commit/9af0a6b35c708309acdfeb4c0c79061b0fd4eb44) -- Add support for semantic indentation (now the default) - - [ae2e2486010554cfeb12f06a1485b4d81609d964](https://github.com/clojure-emacs/clojure-ts-mode/commit/ae2e2486010554cfeb12f06a1485b4d81609d964) - - [ca3914aa7aa9645ab244658f8db781cc6f95111e](https://github.com/clojure-emacs/clojure-ts-mode/commit/ca3914aa7aa9645ab244658f8db781cc6f95111e) - - [85871fdbc831b3129dae5762e9c247d453c35e15](https://github.com/clojure-emacs/clojure-ts-mode/commit/85871fdbc831b3129dae5762e9c247d453c35e15) - - [ff5d7e13dc53cc5da0e8139b04e02d90f61d9065](https://github.com/clojure-emacs/clojure-ts-mode/commit/ff5d7e13dc53cc5da0e8139b04e02d90f61d9065) + - [bd61a7fb281b7b0b1d2e20d19ab5d46cbcdc6c1e](https://github.com/clojure-emacs/clojure-ts-mode/commit/bd61a7fb281b7b0b1d2e20d19ab5d46cbcdc6c1e) +Make font lock feature list more conforming with recommendations + - (See treesit-font-lock-level documentation for more information.) + - [2225190ee57ef667d69f2cd740e0137810bc38e7](https://github.com/clojure-emacs/clojure-ts-mode/commit/2225190ee57ef667d69f2cd740e0137810bc38e7) +Highlight docstrings in interface, protocol, and variable definitions + - [9af0a6b35c708309acdfeb4c0c79061b0fd4eb44](https://github.com/clojure-emacs/clojure-ts-mode/commit/9af0a6b35c708309acdfeb4c0c79061b0fd4eb44) +Add support for semantic indentation (now the default) + - [ae2e2486010554cfeb12f06a1485b4d81609d964](https://github.com/clojure-emacs/clojure-ts-mode/commit/ae2e2486010554cfeb12f06a1485b4d81609d964) + - [ca3914aa7aa9645ab244658f8db781cc6f95111e](https://github.com/clojure-emacs/clojure-ts-mode/commit/ca3914aa7aa9645ab244658f8db781cc6f95111e) + - [85871fdbc831b3129dae5762e9c247d453c35e15](https://github.com/clojure-emacs/clojure-ts-mode/commit/85871fdbc831b3129dae5762e9c247d453c35e15) + - [ff5d7e13dc53cc5da0e8139b04e02d90f61d9065](https://github.com/clojure-emacs/clojure-ts-mode/commit/ff5d7e13dc53cc5da0e8139b04e02d90f61d9065) - Highlight "\`quoted-symbols\` in docs strings like this." - - This feature uses a nested markdown parser. + - This feature uses a nested markdown parser. If the parser is not available this feature should be silently disabled. - [9af0a6b35c708309acdfeb4c0c79061b0fd4eb44](https://github.com/clojure-emacs/clojure-ts-mode/commit/9af0a6b35c708309acdfeb4c0c79061b0fd4eb44) - Highlight methods for `deftype`, `defrecord`, `defprotocol`, `reify` and `definterface` forms ([#20](https://github.com/clojure-emacs/clojure-ts-mode/issues/20)). - - [5231c348e509cff91edd1ec59d7a59645395da15](https://github.com/clojure-emacs/clojure-ts-mode/commit/5231c348e509cff91edd1ec59d7a59645395da15) - - Thank you rrudakov for this contribution. + - [5231c348e509cff91edd1ec59d7a59645395da15](https://github.com/clojure-emacs/clojure-ts-mode/commit/5231c348e509cff91edd1ec59d7a59645395da15) + - Thank you rrudakov for this contribution. - Add derived `clojure-jank-ts-mode` for the [Jank](https://github.com/jank-lang/jank) dialect of clojure - - [a7b9654488693cdc9057a91410f74de42a397d1b](https://github.com/clojure-emacs/clojure-ts-mode/commit/a7b9654488693cdc9057a91410f74de42a397d1b) + - [a7b9654488693cdc9057a91410f74de42a397d1b](https://github.com/clojure-emacs/clojure-ts-mode/commit/a7b9654488693cdc9057a91410f74de42a397d1b) ## 0.1.5 @@ -112,18 +131,18 @@ ## 0.1.3 - Add custom option for highlighting comment macro body forms as comments. [ae3790adc0fc40ad905b8c30b152122991592a4e](https://github.com/clojure-emacs/clojure-ts-mode/commit/ae3790adc0fc40ad905b8c30b152122991592a4e) - - Defaults to OFF, highlighting comment body forms like any other expressions. - - Additionally, does a better job of better detecting comment macros by reducing false positives from forms like (not.clojure.core/comment) + - Defaults to OFF, highlighting comment body forms like any other expressions. + - Additionally, does a better job of better detecting comment macros by reducing false positives from forms like (not.clojure.core/comment) ## 0.1.2 - Add a syntax table from clojure-mode. [712dc772fd38111c1e35fe60e4dbe7ac83032bd6](https://github.com/clojure-emacs/clojure-ts-mode/commit/712dc772fd38111c1e35fe60e4dbe7ac83032bd6). - - Better support for `thing-at-point` driven functionality. - - Thank you @jasonjckn for this contribution. + - Better support for `thing-at-point` driven functionality. + - Thank you @jasonjckn for this contribution. - Add 3 derived major modes [4dc853df16ba09d10dc3a648865e681679c17606](https://github.com/clojure-emacs/clojure-ts-mode/commit/4dc853df16ba09d10dc3a648865e681679c17606) - - clojurescript-ts-mode - - clojurec-ts-mode - - clojure-dart-ts-mode + - clojurescript-ts-mode + - clojurec-ts-mode + - clojure-dart-ts-mode ## 0.1.1 diff --git a/README.md b/README.md index d10ca0c..727abb2 100644 --- a/README.md +++ b/README.md @@ -123,21 +123,51 @@ Once installed, evaluate `clojure-ts-mode.el` and you should be ready to go. > `clojure-ts-mode` install the required grammars automatically, so for most > people no manual actions will be required. -`clojure-ts-mode` makes use of two Tree-sitter grammars to work properly: +`clojure-ts-mode` makes use of the following Tree-sitter grammars: -- The Clojure grammar, mentioned earlier -- [markdown-inline](https://github.com/MDeiml/tree-sitter-markdown), which -will be used for docstrings if available and if `clojure-ts-use-markdown-inline` is enabled. +- The [experimental](https://github.com/sogaiu/tree-sitter-clojure/tree/unstable-20250526) version Clojure grammar. This version includes a few + improvements, which potentially will be promoted to a stable release (See [the + discussion](https://github.com/sogaiu/tree-sitter-clojure/issues/65)). This grammar is required for proper work of `clojure-ts-mode`. +- [markdown-inline](https://github.com/MDeiml/tree-sitter-markdown), which will be used for docstrings if available and if + `clojure-ts-use-markdown-inline` is enabled. +- [tree-sitter-regex](https://github.com/tree-sitter/tree-sitter-regex/releases/tag/v0.24.3), which will be used for regex literals if available and if + `clojure-ts-use-regex-parser` is not `nil`. + +`clojure-ts-clojurescript-mode` can optionally use `tree-sitter-javascript` grammar +to highlight JS syntax in `js*` forms. This is enabled by default and can be +turned off by setting `clojure-ts-clojurescript-use-js-parser` to `nil`. + +`clojure-ts-jank-mode` can optionally use `tree-sitter-cpp` grammar to highlight C++ +syntax in `native/raw` forms. This is enabled by default and can be turned off by +setting `clojure-ts-jank-use-cpp-parser` to `nil`. If you have `git` and a C compiler (`cc`) available on your system's `PATH`, `clojure-ts-mode` will install the grammars when you first open a Clojure file and `clojure-ts-ensure-grammars` is -set to `t` (the default). +set to `t` (the default). macOS users can install the required tools like this: + +```shell +xcode-select --install +``` + +Similarly, Debian/Ubuntu users can do something like: + +```shell +sudo apt install build-essential +``` + +This installs GCC, G++, `make`, and other essential development tools. If `clojure-ts-mode` fails to automatically install the grammar, you have the -option to install it manually, Please, refer to the installation instructions of -each required grammar and make sure you're install the versions expected. (see -`clojure-ts-grammar-recipes` for details) +option to install it manually. Please, refer to the installation instructions of +each required grammar and make sure you're install the versions expected (see +`clojure-ts-grammar-recipes` for details). + +If `clojure-ts-ensure-grammars` is enabled, `clojure-ts-mode` will try to upgrade +the Clojure grammar if it's outdated. This might happen, when you activate +`clojure-ts-mode` for the first time after package update. If grammar was +previously installed, you might need to restart Emacs, because it has to reload +the grammar binary. ### Upgrading Tree-sitter grammars @@ -179,7 +209,7 @@ interactively change this behavior. Set the var `clojure-ts-indent-style` to change it. ``` emacs-lisp -(setq clojure-ts-indent-style 'fixed) +(setopt clojure-ts-indent-style 'fixed) ``` > [!TIP] @@ -231,7 +261,7 @@ Note that `clojure-ts-semantic-indent-rules` should be set using the customization interface or `setopt`; otherwise, it will not be applied correctly. -#### Project local indentation +#### Project-specific indentation Custom indentation rules can be set for individual projects. To achieve this, you need to create a `.dir-locals.el` file in the project root. The content @@ -243,7 +273,7 @@ should look like: ``` In order to apply directory-local variables to existing buffers, they must be -reverted. +"reverted" (reloaded). ### Vertical alignment @@ -286,7 +316,7 @@ Forms that can be aligned vertically are configured via the following variables: To highlight entire rich `comment` expression with the comment font face, set ``` emacs-lisp -(setq clojure-ts-comment-macro-font-lock-body t) +(setopt clojure-ts-comment-macro-font-lock-body t) ``` By default this is `nil`, so that anything within a `comment` expression is @@ -300,6 +330,40 @@ highlighted like regular Clojure code. > section](https://www.gnu.org/software/emacs/manual/html_node/emacs/Parser_002dbased-Font-Lock.html) > of the Emacs manual for more details. +#### Extending font-lock rules + +In `clojure-ts-mode` it is possible to specify additional defn-like forms that +should be fontified. For example to highlight the following form from Hiccup +library as a function definition: + +```clojure +(defelem file-upload + "Creates a file upload input." + [name] + (input-field "file" name nil)) +``` + +You can add `defelem` to `clojure-ts-extra-def-forms` list like this: + +```emacs-lisp +(add-to-list 'clojure-ts-extra-def-forms "defelem") +``` + +or set this variable using `setopt`: + +```emacs-lisp +(setopt clojure-ts-extra-def-forms '("defelem")) +``` + +This setting will highlight `defelem` symbol, function name and the docstring. + +> [!IMPORTANT] +> +> Setting `clojure-ts-extra-def-forms` won't change the indentation rule for +> these forms. For indentation rules you should use +> `clojure-ts-semantic-indent-rules` variable (see [semantic +> indentation](#customizing-semantic-indentation) section). + ### Highlight markdown syntax in docstrings By default Markdown syntax is highlighted in the docstrings using @@ -332,7 +396,7 @@ Example of regex syntax highlighting: To make forms inside of `(comment ...)` forms appear as top-level forms for evaluation and navigation, set ``` emacs-lisp -(setq clojure-ts-toplevel-inside-comment-form t) +(setopt clojure-ts-toplevel-inside-comment-form t) ``` ### Fill paragraph @@ -448,7 +512,7 @@ set. The following commands are available: `clojure-ts-add-arity`: Add a new arity to an existing single-arity or multi-arity function or macro. Function can be defined using `defn`, `fn` or `defmethod` form. This command also supports functions defined inside forms like -`letfn`, `defprotol`, `reify` or `proxy`. +`letfn`, `defprotol`, `reify`, `extend-protocol` or `proxy`. ### Default keybindings @@ -475,6 +539,21 @@ multi-arity function or macro. Function can be defined using `defn`, `fn` or By default prefix for all refactoring commands is `C-c C-r`. It can be changed by customizing `clojure-ts-refactor-map-prefix` variable. +## Code completion + +`clojure-ts-mode` provides basic code completion functionality. Completion only +works for the current source buffer and includes completion of top-level +definitions and local bindings. This feature can be turned off by setting: + +```emacs-lisp +(setopt clojure-ts-completion-enabled nil) +``` + +Here's the short video illustrating the feature with Emacs's built-in completion UI (it +should also work well with more advanced packages like `company` and `corfu`): + +https://github.com/user-attachments/assets/7c37179f-5a5d-424f-9bd6-9c8525f6b2f7 + ## Migrating to clojure-ts-mode If you are migrating to `clojure-ts-mode` note that `clojure-mode` is still @@ -512,17 +591,12 @@ and `clojure-mode` (this is very helpful when dealing with `derived-mode-p` chec - Navigation by sexp/lists might work differently on Emacs versions lower than 31. Starting with version 31, Emacs uses Tree-sitter 'things' settings, if available, to rebind some commands. -- The indentation of list elements with metadata is inconsistent with other - collections. This inconsistency stems from the grammar's interpretation of - nearly every definition or function call as a list. Therefore, modifying the - indentation for list elements would adversely affect the indentation of - numerous other forms. ## Frequently Asked Questions ### What `clojure-mode` features are currently missing? -As of version 0.4.x, `clojure-ts-mode` provides almost all `clojure-mode` features. +As of version 0.5.x, `clojure-ts-mode` provides almost all `clojure-mode` features. Currently only a few refactoring commands are missing. ### Does `clojure-ts-mode` work with CIDER? @@ -548,7 +622,15 @@ Check out [this article](https://metaredux.com/posts/2024/02/19/cider-preliminar ### Does `clojure-ts-mode` work with `inf-clojure`? -Currently, there is an [open PR](https://github.com/clojure-emacs/inf-clojure/pull/215) adding support for inf-clojure. +Yes, it does. `inf-clojure` 3.3+ supports `clojure-ts-mode`. + +### Why does `clojure-ts-mode` require Emacs 30? + +You might be wondering why does `clojure-ts-mode` require Emacs 30 instead of +Emacs 29, which introduced the built-in Tree-sitter support. The answer is +simple - the initial Tree-sitter support in Emacs 29 had quite a few issues and +we felt it's better to nudge most people interested in using it to Emacs 30, +which fixed a lot of the problems. ## License diff --git a/clojure-ts-mode.el b/clojure-ts-mode.el index a4263b4..6930a54 100644 --- a/clojure-ts-mode.el +++ b/clojure-ts-mode.el @@ -7,7 +7,7 @@ ;; Maintainer: Bozhidar Batsov ;; URL: http://github.com/clojure-emacs/clojure-ts-mode ;; Keywords: languages clojure clojurescript lisp -;; Version: 0.4.0 +;; Version: 0.5.0 ;; Package-Requires: ((emacs "30.1")) ;; This file is not part of GNU Emacs. @@ -74,7 +74,7 @@ :link '(emacs-commentary-link :tag "Commentary" "clojure-mode")) (defconst clojure-ts-mode-version - "0.4.0" + "0.5.0" "The current version of `clojure-ts-mode'.") (defcustom clojure-ts-comment-macro-font-lock-body nil @@ -128,6 +128,18 @@ double quotes on the third column." :safe #'booleanp :package-version '(clojure-ts-mode . "0.4")) +(defcustom clojure-ts-clojurescript-use-js-parser t + "When non-nil, use JS grammar to highlight syntax in js* forms." + :type 'boolean + :safe #'booleanp + :package-version '(clojure-ts-mode . "0.5")) + +(defcustom clojure-ts-jank-use-cpp-parser t + "When non-nil, use C++ grammar to highlight syntax in native/raw forms." + :type 'boolean + :safe #'booleanp + :package-version '(clojure-ts-mode . "0.5")) + (defcustom clojure-ts-auto-remap t "When non-nil, redirect all `clojure-mode' buffers to `clojure-ts-mode'." :safe #'booleanp @@ -248,6 +260,18 @@ values like this: :safe #'booleanp :type 'boolean) +(defcustom clojure-ts-extra-def-forms nil + "List of forms that should be fontified the same way as defn." + :package-version '(clojure-ts-mode . "0.5") + :safe #'listp + :type '(repeat string)) + +(defcustom clojure-ts-completion-enabled t + "Enable built-in completion feature." + :package-version '(clojure-ts-mode . "0.5") + :safe #'booleanp + :type 'boolean) + (defvar clojure-ts-mode-remappings '((clojure-mode . clojure-ts-mode) (clojurescript-mode . clojure-ts-clojurescript-mode) @@ -306,7 +330,7 @@ Only intended for use at development time.") "Syntax table for `clojure-ts-mode'.") (defconst clojure-ts--builtin-dynamic-var-regexp - (eval-and-compile + (eval-when-compile (concat "^" (regexp-opt '("*1" "*2" "*3" "*agent*" @@ -323,7 +347,7 @@ Only intended for use at development time.") "$"))) (defconst clojure-ts--builtin-symbol-regexp - (eval-and-compile + (eval-when-compile (concat "^" (regexp-opt '("do" "if" "let*" "var" @@ -341,7 +365,7 @@ Only intended for use at development time.") "defmulti" "defn" "defn-" "defonce" "defprotocol" "defrecord" "defstruct" "deftype" "delay" "doall" "dorun" "doseq" "dosync" "dotimes" "doto" - "extend-protocol" "extend-type" + "extend-protocol" "extend-type" "extend" "for" "future" "gen-class" "gen-interface" "if-let" "if-not" "if-some" "import" "in-ns""io!" @@ -371,72 +395,71 @@ Only intended for use at development time.") "Return a regular expression that matches one of SYMBOLS exactly." (concat "^" (regexp-opt symbols) "$")) -(defvar clojure-ts-function-docstring-symbols - '("definline" - "defmulti" - "defmacro" - "defn" - "defn-" - "defprotocol" - "ns") +(defconst clojure-ts-function-docstring-symbols + (rx line-start + (or "definline" + "defmulti" + "defmacro" + "defn" + "defn-" + "defprotocol" + "ns") + line-end) "Symbols that accept an optional docstring as their second argument.") -(defvar clojure-ts-definition-docstring-symbols - '("def") +(defconst clojure-ts-definition-docstring-symbols + (rx line-start "def" line-end) "Symbols that accept an optional docstring as their second argument. Any symbols added here should only treat their second argument as a docstring if a third argument (the value) is provided. \"def\" is the only builtin Clojure symbol that behaves like this.") (defconst clojure-ts--variable-definition-symbol-regexp - (eval-and-compile - (rx line-start (or "def" "defonce") line-end)) + (rx line-start (or "def" "defonce") line-end) "A regular expression matching a symbol used to define a variable.") (defconst clojure-ts--typedef-symbol-regexp - (eval-and-compile - (rx line-start - (or "defprotocol" "defmulti" "deftype" "defrecord" - "definterface" "defmethod" "defstruct") - line-end)) + (rx line-start + (or "defprotocol" "defmulti" "deftype" "defrecord" + "definterface" "defmethod" "defstruct") + line-end) "A regular expression matching a symbol used to define a type.") (defconst clojure-ts--type-symbol-regexp - (eval-and-compile - (rx line-start - (or "deftype" "defrecord" - ;; While not reifying, helps with doc strings - "defprotocol" "definterface" - "reify" "proxy" "extend-type" "extend-protocol") - line-end)) + (rx line-start + (or "deftype" "defrecord" + ;; While not reifying, helps with doc strings + "defprotocol" "definterface" + "reify" "proxy" "extend-type" "extend-protocol") + line-end) "A regular expression matching a symbol used to define or instantiate a type.") (defconst clojure-ts--interface-def-symbol-regexp - (eval-and-compile - (rx line-start (or "defprotocol" "definterface") line-end)) + (rx line-start (or "defprotocol" "definterface") line-end) "A regular expression matching a symbol used to define an interface.") (defun clojure-ts--docstring-query (capture-symbol) "Return a query that captures docstrings with CAPTURE-SYMBOL." `(;; Captures docstrings in def - ((list_lit :anchor (meta_lit) :? + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit) @_def_symbol - :anchor (comment) :? - :anchor (sym_lit) ; variable name - :anchor (comment) :? - :anchor (str_lit) ,capture-symbol - :anchor (_)) ; the variable's value - (:match ,(clojure-ts-symbol-regexp clojure-ts-definition-docstring-symbols) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + ;; Variable name + :anchor (sym_lit) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (str_lit (str_content) ,capture-symbol) @font-lock-doc-face + ;; The variable's value + :anchor (_)) + (:match ,clojure-ts-definition-docstring-symbols @_def_symbol)) ;; Captures docstrings in metadata of definitions - ((list_lit :anchor (sym_lit) @_def_symbol - :anchor (comment) :? - :anchor (sym_lit - (meta_lit - value: (map_lit - (kwd_lit) @_doc-keyword - :anchor - (str_lit) ,capture-symbol)))) + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit) @_def_symbol + :anchor (comment) :* + :anchor (meta_lit + value: (map_lit + (kwd_lit) @_doc-keyword + :anchor (str_lit (str_content) ,capture-symbol) @font-lock-doc-face))) ;; We're only supporting this on a fixed set of defining symbols ;; Existing regexes don't encompass def and defn ;; Naming another regex is very cumbersome. @@ -448,22 +471,36 @@ if a third argument (the value) is provided. @_def_symbol) (:equal @_doc-keyword ":doc")) ;; Captures docstrings defn, defmacro, ns, and things like that - ((list_lit :anchor (meta_lit) :? + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit) @_def_symbol - :anchor (comment) :? - :anchor (sym_lit) ; function_name - :anchor (comment) :? - :anchor (str_lit) ,capture-symbol) - (:match ,(clojure-ts-symbol-regexp clojure-ts-function-docstring-symbols) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + ;; Function_name + :anchor (sym_lit) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (str_lit (str_content) ,capture-symbol) @font-lock-doc-face) + (:match ,clojure-ts-function-docstring-symbols + @_def_symbol)) + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit) @_def_symbol + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + ;; Function_name + :anchor (sym_lit) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (str_lit (str_content) ,capture-symbol) @font-lock-doc-face) + (:match ,(clojure-ts-symbol-regexp clojure-ts-extra-def-forms) @_def_symbol)) ;; Captures docstrings in defprotcol, definterface - ((list_lit :anchor (sym_lit) @_def_symbol - (list_lit - :anchor (sym_lit) (vec_lit) :* - (str_lit) ,capture-symbol :anchor) + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit) @_def_symbol + (list_lit :anchor (sym_lit) (vec_lit) :* + (str_lit (str_content) ,capture-symbol) @font-lock-doc-face) :*) (:match ,clojure-ts--interface-def-symbol-regexp @_def_symbol)))) +(defconst clojure-ts--match-docstring-query + (treesit-query-compile 'clojure (clojure-ts--docstring-query '@font-lock-doc-face)) + "Precompiled query that matches a Clojure docstring.") + (defun clojure-ts--treesit-range-settings (use-markdown-inline use-regex) "Return value for `treesit-range-settings' for `clojure-ts-mode'. @@ -476,16 +513,45 @@ When USE-REGEX is non-nil, include range settings for regex parser." (treesit-range-rules :embed 'markdown-inline :host 'clojure - :offset '(1 . -1) :local t (clojure-ts--docstring-query '@capture))) (when use-regex (treesit-range-rules :embed 'regex :host 'clojure - :offset '(2 . -1) :local t - '((regex_lit) @capture))))) + '((regex_content) @capture))))) + +(defun clojure-ts--fontify-string (node override _start _end &optional _rest) + "Fontify string content NODE with `font-lock-string-face'. + +In order to support embedded syntax highlighting for JS in ClojureScript +and C++ in Jank we need to avoid fontifying string content in some +special forms, such as native/raw in Jank and js* in ClojureScript, +otherwise string face will interfere with embedded parser's faces. + +This function respects OVERRIDE argument by passing it to +`treesit-fontify-with-override'. + +START and END arguments that are passed to this function are not start +and end of the NODE, so we ignore them." + (let* ((prev (treesit-node-prev-sibling (treesit-node-parent node))) + ;; TODO: Seems jank has removed this syntax, so we might drop this + ;; after jank 1.0 gets released + ;; See https://github.com/jank-lang/jank/issues/24#issuecomment-2924460595 + (jank-native-p (and (derived-mode-p 'clojure-ts-jank-mode) + clojure-ts-jank-use-cpp-parser + (clojure-ts--symbol-node-p prev) + (string= (treesit-node-text prev) "native/raw"))) + (js-interop-p (and (derived-mode-p 'clojure-ts-clojurescript-mode) + clojure-ts-clojurescript-use-js-parser + (clojure-ts--symbol-node-p prev) + (string= (treesit-node-text prev) "js*")))) + (when (not (or jank-native-p js-interop-p)) + (treesit-fontify-with-override (treesit-node-start node) + (treesit-node-end node) + 'font-lock-string-face + override)))) (defun clojure-ts--font-lock-settings (markdown-available regex-available) "Return font lock settings suitable for use in `treesit-font-lock-settings'. @@ -499,7 +565,9 @@ literals with regex grammar." (treesit-font-lock-rules :feature 'string :language 'clojure - '((str_lit) @font-lock-string-face + '((str_lit open: _ @font-lock-string-face + (str_content) @clojure-ts--fontify-string + close: _ @font-lock-string-face) (regex_lit) @font-lock-regexp-face) :feature 'regex @@ -531,19 +599,21 @@ literals with regex grammar." ;; `clojure.core'. :feature 'builtin :language 'clojure - `(((list_lit meta: _ :* :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) + `(((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) - ((list_lit meta: _ :* :anchor - (sym_lit namespace: ((sym_ns) @ns - (:equal "clojure.core" @ns)) - name: (sym_name) @font-lock-keyword-face)) + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit namespace: ((sym_ns) @ns + (:equal "clojure.core" @ns)) + name: (sym_name) @font-lock-keyword-face)) (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) - ((anon_fn_lit meta: _ :* :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) + ((anon_fn_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) - ((anon_fn_lit meta: _ :* :anchor - (sym_lit namespace: ((sym_ns) @ns - (:equal "clojure.core" @ns)) - name: (sym_name) @font-lock-keyword-face)) + ((anon_fn_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit namespace: ((sym_ns) @ns + (:equal "clojure.core" @ns)) + name: (sym_name) @font-lock-keyword-face)) (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) ((sym_name) @font-lock-builtin-face (:match ,clojure-ts--builtin-dynamic-var-regexp @font-lock-builtin-face))) @@ -565,8 +635,9 @@ literals with regex grammar." ;; No wonder the tree-sitter-clojure grammar only touches syntax, and not semantics :feature 'definition ;; defn and defn like macros :language 'clojure - `(((list_lit :anchor meta: _ :* + `(((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit (sym_name) @font-lock-keyword-face) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit (sym_name) @font-lock-function-name-face)) (:match ,(rx-to-string `(seq bol @@ -579,30 +650,44 @@ literals with regex grammar." "deftest" "deftest-" "defmacro" - "definline") + "definline" + "defonce") eol)) @font-lock-keyword-face)) + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit (sym_name) @font-lock-keyword-face) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit (sym_name) @font-lock-function-name-face)) + (:match ,(clojure-ts-symbol-regexp clojure-ts-extra-def-forms) + @font-lock-keyword-face)) ((anon_fn_lit marker: "#" @font-lock-property-face)) ;; Methods implementation + ((list_lit + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor ((sym_lit name: (sym_name) @def) + ((:match ,(rx-to-string + `(seq bol + (or + "defrecord" + "definterface" + "deftype" + "defprotocol") + eol)) + @def))) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit (sym_name) @font-lock-type-face) + (list_lit + (sym_lit name: (sym_name) @font-lock-function-name-face)))) ((list_lit ((sym_lit name: (sym_name) @def) ((:match ,(rx-to-string `(seq bol - (or - "defrecord" - "definterface" - "deftype" - "defprotocol") + (or "reify" + "extend-protocol" + "extend-type") eol)) @def))) - :anchor - (sym_lit (sym_name) @font-lock-type-face) - (list_lit - (sym_lit name: (sym_name) @font-lock-function-name-face)))) - ((list_lit - ((sym_lit name: (sym_name) @def) - ((:equal "reify" @def))) (list_lit (sym_lit name: (sym_name) @font-lock-function-name-face)))) ;; letfn @@ -615,8 +700,9 @@ literals with regex grammar." :feature 'variable ;; def, defonce :language 'clojure - `(((list_lit :anchor meta: _ :* + `(((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit (sym_name) @font-lock-keyword-face) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit (sym_name) @font-lock-variable-name-face)) (:match ,clojure-ts--variable-definition-symbol-regexp @font-lock-keyword-face))) @@ -664,7 +750,7 @@ literals with regex grammar." (treesit-font-lock-rules :feature 'doc :language 'markdown-inline - :override t + :override 'prepend `([((image_description) @link) ((link_destination) @font-lock-constant-face) ((code_span) @font-lock-constant-face) @@ -739,6 +825,7 @@ literals with regex grammar." `((comment) @font-lock-comment-face (dis_expr marker: "#_" @font-lock-comment-delimiter-face + meta: (meta_lit) :* @font-lock-comment-face value: _ @font-lock-comment-face) (,(append '(list_lit :anchor (sym_lit) @font-lock-comment-delimiter-face) @@ -783,7 +870,8 @@ literals with regex grammar." (defun clojure-ts--metadata-node-p (node) "Return non-nil if NODE is a Clojure metadata node." - (string-equal "meta_lit" (treesit-node-type node))) + (or (string-equal "meta_lit" (treesit-node-type node)) + (string-equal "old_meta_lit" (treesit-node-type node)))) (defun clojure-ts--var-node-p (node) "Return non-nil if NODE is a var (eg. #\\'foo)." @@ -816,11 +904,16 @@ Skip the optional metadata node at pos 0 if present." n) t))) -(defun clojure-ts--node-with-metadata-parent (node) - "Return parent for NODE only if NODE has metadata, otherwise return nil." - (when-let* ((prev-sibling (treesit-node-prev-sibling node)) - ((clojure-ts--metadata-node-p prev-sibling))) - (treesit-node-parent (treesit-node-parent node)))) +(defun clojure-ts--first-value-child (node) + "Return the first value child of the given NODE. + +In the syntax tree, there are a few types of possible child nodes: +unnamed standalone nodes (e.g., comments), anonymous nodes (e.g., +opening or closing parentheses), and named nodes. Named nodes are +standalone nodes that are labeled by a specific name. The most common +names are meta and value. This function skips any unnamed, anonymous, +and metadata nodes and returns the first value node." + (treesit-node-child-by-field-name node "value")) (defun clojure-ts--symbol-matches-p (symbol-regexp node) "Return non-nil if NODE is a symbol that matches SYMBOL-REGEXP." @@ -864,7 +957,7 @@ See `clojure-ts--definition-node-p' when an exact match is possible." (defun clojure-ts--standard-definition-node-name (node) "Return the definition name for the given NODE. -Returns nil if NODE is not a list with symbols as the first two +Return nil if NODE is not a list with symbols as the first two children. For example the node representing the expression (def foo 1) would return foo. The node representing (ns user) would return user. Does not do any matching on the first symbol (def, defn, etc), so @@ -888,7 +981,7 @@ Can be called directly, but intended for use as `treesit-defun-name-function'." (defun clojure-ts--kwd-definition-node-name (node) "Return the keyword name for the given NODE. -Returns nil if NODE is not a list where the first element is a symbol +Return nil if NODE is not a list where the first element is a symbol and the second is a keyword. For example, a node representing the expression (s/def ::foo int?) would return foo. @@ -1038,6 +1131,7 @@ The possible values for this variable are ("try" . ((:block 0))) ("with-out-str" . ((:block 0))) ("defprotocol" . ((:block 1) (:inner 1))) + ("definterface" . ((:block 1) (:inner 1))) ("binding" . ((:block 1))) ("case" . ((:block 1))) ("cond->" . ((:block 1))) @@ -1294,31 +1388,31 @@ indentation rule in `clojure-ts--semantic-indent-rules-defaults' or according to the rule. If NODE is nil, use next node after BOL." (and (or (clojure-ts--list-node-p parent) (clojure-ts--anon-fn-node-p parent)) - (let* ((first-child (clojure-ts--node-child-skip-metadata parent 0))) + (let* ((first-child (clojure-ts--first-value-child parent))) (when-let* ((rule (clojure-ts--find-semantic-rule node parent 0))) - (and (not (clojure-ts--match-with-metadata node)) - (let ((rule-type (car rule)) - (rule-value (cadr rule))) - (if (equal rule-type :block) - (if (zerop rule-value) - ;; Special treatment for block 0 rule. - (clojure-ts--match-block-0-body bol first-child) - (clojure-ts--node-pos-match-block node parent bol rule-value)) - ;; Return true for any inner rule. - t))))))) + (let ((rule-type (car rule)) + (rule-value (cadr rule))) + (if (equal rule-type :block) + (if (zerop rule-value) + ;; Special treatment for block 0 rule. + (clojure-ts--match-block-0-body bol first-child) + (clojure-ts--node-pos-match-block node parent bol rule-value)) + ;; Return true for any inner rule. + t)))))) (defun clojure-ts--match-function-call-arg (node parent _bol) "Match NODE if PARENT is a list expressing a function or macro call." (and (or (clojure-ts--list-node-p parent) (clojure-ts--anon-fn-node-p parent)) - ;; Can the following two clauses be replaced by checking indexes? - ;; Does the second child exist, and is it not equal to the current node? - (clojure-ts--node-child-skip-metadata parent 1) - (not (treesit-node-eq (clojure-ts--node-child-skip-metadata parent 1) node)) - (let ((first-child (clojure-ts--node-child-skip-metadata parent 0))) - (or (clojure-ts--symbol-node-p first-child) - (clojure-ts--keyword-node-p first-child) - (clojure-ts--var-node-p first-child))))) + (let ((first-child (clojure-ts--first-value-child parent)) + (second-child (clojure-ts--node-child-skip-metadata parent 1))) + (and first-child + ;; Does the second child exist, and is it not equal to the current node? + second-child + (not (treesit-node-eq second-child node)) + (or (clojure-ts--symbol-node-p first-child) + (clojure-ts--keyword-node-p first-child) + (clojure-ts--var-node-p first-child)))))) (defvar clojure-ts--threading-macro (eval-and-compile @@ -1331,55 +1425,25 @@ according to the rule. If NODE is nil, use next node after BOL." ;; If not, then align function arg. (and (or (clojure-ts--list-node-p parent) (clojure-ts--anon-fn-node-p parent)) - (let ((first-child (treesit-node-child parent 0 t))) + (let ((first-child (clojure-ts--first-value-child parent))) (clojure-ts--symbol-matches-p clojure-ts--threading-macro first-child)))) -(defun clojure-ts--match-fn-docstring (node) - "Match NODE when it is a docstring for PARENT function definition node." - ;; A string that is the third node in a function defn block - (let ((parent (treesit-node-parent node))) - (and (treesit-node-eq node (treesit-node-child parent 2 t)) - (let ((first-auncle (treesit-node-child parent 0 t))) - (clojure-ts--symbol-matches-p - (regexp-opt clojure-ts-function-docstring-symbols) - first-auncle))))) - -(defun clojure-ts--match-def-docstring (node) - "Match NODE when it is a docstring for PARENT variable definition node." - ;; A string that is the fourth node in a variable definition block. - (let ((parent (treesit-node-parent node))) - (and (treesit-node-eq node (treesit-node-child parent 2 t)) - ;; There needs to be a value after the string. - ;; If there is no 4th child, then this string is the value. - (treesit-node-child parent 3 t) - (let ((first-auncle (treesit-node-child parent 0 t))) - (clojure-ts--symbol-matches-p - (regexp-opt clojure-ts-definition-docstring-symbols) - first-auncle))))) - -(defun clojure-ts--match-method-docstring (node) - "Match NODE when it is a docstring in a method definition." - (let* ((grandparent (treesit-node-parent ;; the protocol/interface - (treesit-node-parent node))) ;; the method definition - (first-grandauncle (treesit-node-child grandparent 0 t))) - (clojure-ts--symbol-matches-p - clojure-ts--interface-def-symbol-regexp - first-grandauncle))) - (defun clojure-ts--match-docstring (_node parent _bol) "Match PARENT when it is a docstring node." - (and (clojure-ts--string-node-p parent) ;; We are IN a string - (or (clojure-ts--match-def-docstring parent) - (clojure-ts--match-fn-docstring parent) - (clojure-ts--match-method-docstring parent)))) + (when-let* ((top-level-node (treesit-parent-until parent 'defun t)) + (result (treesit-query-capture top-level-node + clojure-ts--match-docstring-query))) + (seq-find (lambda (elt) + (and (eq (car elt) 'font-lock-doc-face) + (treesit-node-eq (cdr elt) parent))) + result))) (defun clojure-ts--match-with-metadata (node &optional _parent _bol) "Match NODE when it has metadata." - (let ((prev-sibling (treesit-node-prev-sibling node))) - (and prev-sibling - (clojure-ts--metadata-node-p prev-sibling)))) + (when-let* ((prev-sibling (treesit-node-prev-sibling node))) + (clojure-ts--metadata-node-p prev-sibling))) (defun clojure-ts--anchor-parent-opening-paren (_node parent _bol) "Return position of PARENT start for NODE. @@ -1393,21 +1457,10 @@ for forms with type hints." (treesit-search-subtree #'clojure-ts--opening-paren-node-p nil t 1) (treesit-node-start))) -(defun clojure-ts--match-collection-item-with-metadata (node-type) - "Return a matcher for a collection item with metadata by NODE-TYPE. - -The returned matcher accepts NODE, PARENT and BOL and returns true only -if NODE has metadata and its parent has type NODE-TYPE." - (lambda (node _parent _bol) - (string-equal node-type - (treesit-node-type - (clojure-ts--node-with-metadata-parent node))))) - (defun clojure-ts--anchor-nth-sibling (n) "Return the start of the Nth child of PARENT skipping metadata." (lambda (_n parent &rest _) - (treesit-node-start - (clojure-ts--node-child-skip-metadata parent n)))) + (treesit-node-start (treesit-node-child parent n t)))) (defun clojure-ts--semantic-indent-rules () "Return a list of indentation rules for `treesit-simple-indent-rules'. @@ -1419,20 +1472,6 @@ regexes with anchors matching the beginning and end of the line are used." `((clojure ((parent-is "^source$") parent-bol 0) - (clojure-ts--match-docstring parent 0) - ;; Collections items with metadata. - ;; - ;; This should be before `clojure-ts--match-with-metadata', otherwise they - ;; will never be matched. - (,(clojure-ts--match-collection-item-with-metadata "^vec_lit$") grand-parent 1) - (,(clojure-ts--match-collection-item-with-metadata "^map_lit$") grand-parent 1) - (,(clojure-ts--match-collection-item-with-metadata "^set_lit$") grand-parent 2) - ;; - ;; If we enable this rule for lists, it will break many things. - ;; (,(clojure-ts--match-collection-item-with-metadata "list_lit") grand-parent 1) - ;; - ;; All other forms with metadata. - (clojure-ts--match-with-metadata parent 0) ;; Literal Sequences ((parent-is "^vec_lit$") parent 1) ;; https://guide.clojure.style/#bindings-alignment ((parent-is "^map_lit$") parent 1) ;; https://guide.clojure.style/#map-keys-alignment @@ -1448,7 +1487,14 @@ used." ;; https://guide.clojure.style/#vertically-align-fn-args (clojure-ts--match-function-call-arg ,(clojure-ts--anchor-nth-sibling 1) 0) ;; https://guide.clojure.style/#one-space-indent - ((parent-is "^list_lit$") parent 1)))) + ((parent-is "^list_lit$") parent 1) + ((parent-is "^anon_fn_lit$") parent 2) + (clojure-ts--match-with-metadata parent 0) + ;; This is slow and only matches when point is inside of a docstring and + ;; only when Markdown grammar is disabled. `indent-region' tries to match + ;; all the rules from top to bottom, so order matters here (the slowest + ;; rules should be at the bottom). + (clojure-ts--match-docstring parent 0)))) (defun clojure-ts--configured-indent-rules () "Gets the configured choice of indent rules." @@ -1491,7 +1537,7 @@ of the first symbol of a functional literal NODE." (when (or (clojure-ts--list-node-p node) (and include-anon-fn-lit (clojure-ts--anon-fn-node-p node))) - (when-let* ((first-child (clojure-ts--node-child-skip-metadata node 0)) + (when-let* ((first-child (clojure-ts--first-value-child node)) ((clojure-ts--symbol-node-p first-child))) (clojure-ts--named-node-text first-child)))) @@ -1521,18 +1567,28 @@ function literal." "map_lit" "ns_map_lit" "vec_lit" "set_lit") "A regular expression that matches nodes that can be treated as lists.") +(defconst clojure-ts--defun-symbols-regex + (rx bol + (or "def" + "defn" + "defn-" + "definline" + "defrecord" + "defmacro" + "defmulti" + "defonce" + "defprotocol" + "deftest" + "deftest-" + "ns" + "definterface" + "deftype" + "defstruct") + eol)) + (defun clojure-ts--defun-node-p (node) "Return TRUE if NODE is a function or a var definition." - (clojure-ts--list-node-sym-match-p node - (rx bol - (or "def" - "defn" - "defn-" - "definline" - "defrecord" - "defmacro" - "defmulti") - eol))) + (clojure-ts--list-node-sym-match-p node clojure-ts--defun-symbols-regex)) (defconst clojure-ts--markdown-inline-sexp-nodes '("inline_link" "full_reference_link" "collapsed_reference_link" @@ -1540,10 +1596,22 @@ function literal." "code_span") "Nodes representing s-expressions in the `markdown-inline' parser.") +(defun clojure-ts--default-sexp-node-p (node) + "Return TRUE if point is after the # marker of set or function literal NODE." + (and (eq (char-before) ?\#) + (string-match-p (rx bol (or "anon_fn_lit" "set_lit") eol) + (treesit-node-type (treesit-node-parent node))))) + (defconst clojure-ts--thing-settings `((clojure (sexp ,(regexp-opt clojure-ts--sexp-nodes)) (list ,(regexp-opt clojure-ts--list-nodes)) + ;; `sexp-default' thing allows to fallback to the default implementation of + ;; `forward-sexp' function where `treesit-forward-sexp' produces undesired + ;; results. + (sexp-default + ;; For `C-M-f' in "#|(a)" or "#|{1 2 3}" + (,(rx (or "(" "{")) . ,#'clojure-ts--default-sexp-node-p)) (text ,(regexp-opt '("comment"))) (defun ,#'clojure-ts--defun-node-p)) (when clojure-ts-use-markdown-inline @@ -1593,11 +1661,11 @@ BOUND bounds the whitespace search." (point)) (when-let* ((cur-sexp (treesit-node-first-child-for-pos root-node (point) t))) (goto-char (treesit-node-start cur-sexp)) - (if (and (string= "sym_lit" (treesit-node-type cur-sexp)) - (clojure-ts--metadata-node-p (treesit-node-child cur-sexp 0 t)) - (and (not (treesit-node-child-by-field-name cur-sexp "value")) - (string-empty-p (clojure-ts--named-node-text cur-sexp)))) - (treesit-end-of-thing 'sexp 2 'restricted) + (if (clojure-ts--metadata-node-p cur-sexp) + (progn + (treesit-end-of-thing 'sexp 1 'restricted) + (just-one-space) + (treesit-end-of-thing 'sexp 1 'restricted)) (treesit-end-of-thing 'sexp 1 'restricted)) (when (looking-at-p ",") (forward-char)) @@ -1630,6 +1698,41 @@ BOUND bounds the whitespace search." sexp-end t))) +(defvar clojure-ts--align-query + (treesit-query-compile 'clojure + `(((map_lit) @map) + ((ns_map_lit) @ns-map) + ((list_lit + ((sym_lit) @sym + (:match ,(clojure-ts-symbol-regexp clojure-ts-align-binding-forms) @sym)) + (vec_lit) @bindings-vec)) + ((list_lit + :anchor + ((sym_lit) @sym + (:match ,(rx bol (or "for" "doseq") eol) @sym)) + (vec_lit + ((kwd_lit) @kwd + (:equal ":let" @kwd)) + :anchor + (vec_lit) @bindings-vec))) + ((list_lit + ((sym_lit) @sym + (:match ,(clojure-ts-symbol-regexp clojure-ts-align-cond-forms) @sym))) + @cond) + ((anon_fn_lit + ((sym_lit) @sym + (:match ,(clojure-ts-symbol-regexp clojure-ts-align-binding-forms) @sym)) + (vec_lit) @bindings-vec)) + ((anon_fn_lit + ((sym_lit) @sym + (:match ,(clojure-ts-symbol-regexp clojure-ts-align-cond-forms) @sym))) + @cond)))) + +(defvar clojure-ts--align-reader-conditionals-query + (treesit-query-compile 'clojure + '(((read_cond_lit) @read-cond) + ((splicing_read_cond_lit) @read-cond)))) + (defun clojure-ts--get-nodes-to-align (beg end) "Return a plist of nodes data for alignment. @@ -1644,31 +1747,15 @@ have changed." ;; By default `treesit-query-capture' captures all nodes that cross the range. ;; We need to restrict it to only nodes inside of the range. (let* ((region-node (clojure-ts--region-node beg end)) - (query (treesit-query-compile 'clojure - (append - `(((map_lit) @map) - ((ns_map_lit) @ns-map) - ((list_lit - ((sym_lit) @sym - (:match ,(clojure-ts-symbol-regexp clojure-ts-align-binding-forms) @sym)) - (vec_lit) @bindings-vec)) - ((list_lit - ((sym_lit) @sym - (:match ,(clojure-ts-symbol-regexp clojure-ts-align-cond-forms) @sym))) - @cond) - ((anon_fn_lit - ((sym_lit) @sym - (:match ,(clojure-ts-symbol-regexp clojure-ts-align-binding-forms) @sym)) - (vec_lit) @bindings-vec)) - ((anon_fn_lit - ((sym_lit) @sym - (:match ,(clojure-ts-symbol-regexp clojure-ts-align-cond-forms) @sym))) - @cond)) - (when clojure-ts-align-reader-conditionals - '(((read_cond_lit) @read-cond) - ((splicing_read_cond_lit) @read-cond))))))) - (thread-last (treesit-query-capture region-node query beg end) + (nodes (append (treesit-query-capture region-node clojure-ts--align-query beg end) + (when clojure-ts-align-reader-conditionals + (treesit-query-capture region-node clojure-ts--align-reader-conditionals-query beg end))))) + (thread-last nodes (seq-remove (lambda (elt) (eq (car elt) 'sym))) + ;; Reverse the result to align the most deeply nested nodes + ;; first. This way we can prevent breaking alignment of outer + ;; nodes. + (seq-reverse) ;; When first node is reindented, all other nodes become ;; outdated. Executing the entire query everytime is very ;; expensive, instead we use markers for every captured node to @@ -1908,6 +1995,7 @@ parenthesis." (delete-region beg (point)) ;; `raise-sexp' doesn't work properly for function literals (it loses one ;; of the parenthesis). Seems like an Emacs' bug. + (backward-up-list) (delete-pair)))) (defun clojure-ts--fix-sexp-whitespace () @@ -1947,19 +2035,25 @@ With universal argument \\[universal-argument], fully unwinds thread." (end (thread-first threading-sexp (treesit-node-end) (copy-marker)))) - (while (> n 0) - (cond - ((string-match-p (rx bol (* "some") "->" eol) sym) - (clojure-ts--unwind-thread-first)) - ((string-match-p (rx bol (* "some") "->>" eol) sym) - (clojure-ts--unwind-thread-last))) - (setq n (1- n)) - ;; After unwinding we check if it is the last expression and maybe - ;; splice it. - (when (clojure-ts--nothing-more-to-unwind) - (clojure-ts--pop-out-of-threading) - (clojure-ts--fix-sexp-whitespace) - (setq n 0))) + ;; If it's the last expression, just raise it out of the threading + ;; macro. + (if (clojure-ts--nothing-more-to-unwind) + (progn + (clojure-ts--pop-out-of-threading) + (clojure-ts--fix-sexp-whitespace)) + (while (> n 0) + (cond + ((string-match-p (rx bol (* "some") "->" eol) sym) + (clojure-ts--unwind-thread-first)) + ((string-match-p (rx bol (* "some") "->>" eol) sym) + (clojure-ts--unwind-thread-last))) + (setq n (1- n)) + ;; After unwinding we check if it is the last expression and maybe + ;; splice it. + (when (clojure-ts--nothing-more-to-unwind) + (clojure-ts--pop-out-of-threading) + (clojure-ts--fix-sexp-whitespace) + (setq n 0)))) (indent-region beg end) (delete-trailing-whitespace beg end))) (user-error "No threading form to unwind at point"))) @@ -2112,9 +2206,9 @@ type, etc. See `treesit-thing-settings' for more details." (defun clojure-ts--add-arity-internal (fn-node) "Add an arity to a function defined by FN-NODE." (let* ((first-coll (clojure-ts--node-child fn-node (rx bol (or "vec_lit" "list_lit") eol))) - (coll-start (clojure-ts--node-start-skip-metadata first-coll)) + (coll-start (treesit-node-start first-coll)) (line-parent (thread-first fn-node - (clojure-ts--node-child-skip-metadata 0) + (clojure-ts--first-value-child) (treesit-node-start) (line-number-at-pos))) (line-args (line-number-at-pos coll-start)) @@ -2133,7 +2227,7 @@ type, etc. See `treesit-thing-settings' for more details." (defun clojure-ts--add-arity-defprotocol-internal (fn-node) "Add an arity to a defprotocol function defined by FN-NODE." (let* ((args-vec (clojure-ts--node-child fn-node (rx bol "vec_lit" eol))) - (args-vec-start (clojure-ts--node-start-skip-metadata args-vec)) + (args-vec-start (treesit-node-start args-vec)) (line-parent (thread-first fn-node (clojure-ts--node-child-skip-metadata 0) (treesit-node-start) @@ -2153,7 +2247,7 @@ type, etc. See `treesit-thing-settings' for more details." (defun clojure-ts--add-arity-reify-internal (fn-node) "Add an arity to a reify function defined by FN-NODE." (let* ((fn-name (clojure-ts--list-node-sym-text fn-node))) - (goto-char (clojure-ts--node-start-skip-metadata fn-node)) + (goto-char (treesit-node-start fn-node)) (insert "(" fn-name " [])") (newline-and-indent) ;; Put the point between sqare brackets. @@ -2186,6 +2280,12 @@ type, etc. See `treesit-thing-settings' for more details." (and (clojure-ts--list-node-p node) (string= (clojure-ts--list-node-sym-text parent) "reify")))) +(defun clojure-ts--extend-protocol-defn-p (node) + "Return non-nil if NODE is a function definition in an extend-protocol form." + (when-let* ((parent (treesit-node-parent node))) + (and (clojure-ts--list-node-p node) + (string= (clojure-ts--list-node-sym-text parent) "extend-protocol")))) + (defun clojure-ts-add-arity () "Add an arity to a function or macro." (interactive) @@ -2196,6 +2296,7 @@ type, etc. See `treesit-thing-settings' for more details." "defmacro" "defmethod" "defprotocol" + "extend-protocol" "reify" "proxy") eol)) @@ -2210,13 +2311,16 @@ type, etc. See `treesit-thing-settings' for more details." (clojure-ts--parent-until #'clojure-ts--defprotocol-defn-p)) ((string= parent-def-sym "reify") (clojure-ts--parent-until #'clojure-ts--reify-defn-p)) + ((string= parent-def-sym "extend-protocol") + (clojure-ts--parent-until #'clojure-ts--extend-protocol-defn-p)) (t parent-def-node)))) (let ((beg-marker (copy-marker (treesit-node-start parent-def-node))) (end-marker (copy-marker (treesit-node-end parent-def-node)))) (cond ((string= parent-def-sym "defprotocol") (clojure-ts--add-arity-defprotocol-internal fn-node)) - ((string= parent-def-sym "reify") + ((or (string= parent-def-sym "reify") + (string= parent-def-sym "extend-protocol")) (clojure-ts--add-arity-reify-internal fn-node)) (t (clojure-ts--add-arity-internal fn-node))) (indent-region beg-marker end-marker)) @@ -2325,7 +2429,7 @@ before DELIM-OPEN." ("when" "when-not") ("when-not" "when")))) (save-excursion - (goto-char (clojure-ts--node-start-skip-metadata cond-node)) + (goto-char (treesit-node-start cond-node)) (down-list 1) (delete-char (length cond-sym)) (insert new-sym) @@ -2335,25 +2439,10 @@ before DELIM-OPEN." (indent-region beg end-marker))) (user-error "No conditional expression found"))) -(defun clojure-ts--point-outside-node-p (node) - "Return non-nil if point is outside of the actual NODE start. - -Clojure grammar treats metadata as part of an expression, so for example -^boolean (not (= 2 2)) is a single list node, including metadata. This -causes issues for functions that navigate by s-expressions and lists. -This function returns non-nil if point is outside of the outermost -parenthesis." - (let* ((actual-node-start (clojure-ts--node-start-skip-metadata node)) - (node-end (treesit-node-end node)) - (pos (point))) - (or (< pos actual-node-start) - (> pos node-end)))) - (defun clojure-ts-cycle-not () "Add or remove a not form around the current form." (interactive) - (if-let* ((list-node (clojure-ts--parent-until (rx bol "list_lit" eol))) - ((not (clojure-ts--point-outside-node-p list-node)))) + (if-let* ((list-node (clojure-ts--parent-until (rx bol "list_lit" eol)))) (let ((beg (treesit-node-start list-node)) (end-marker (copy-marker (treesit-node-end list-node))) (pos (copy-marker (point) t))) @@ -2426,10 +2515,131 @@ parenthesis." ["Fully thread a form with ->>" clojure-ts-thread-last-all] "--" ["Unwind once" clojure-ts-unwind] - ["Fully unwind a threading macro" clojure-ts-unwind-all]))) + ["Fully unwind a threading macro" clojure-ts-unwind-all]) + ["Version" clojure-mode-display-version])) map) "Keymap for `clojure-ts-mode'.") +;;; Completion + +(defconst clojure-ts--completion-query-defuns + (treesit-query-compile 'clojure + `((source + (list_lit + ((sym_lit) @sym + (:match ,clojure-ts--defun-symbols-regex @sym)) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor ((sym_lit) @defun-candidate))))) + "Query that matches top-level definitions.") + +(defconst clojure-ts--completion-defn-with-args-sym-regex + (rx bol + (or "defn" + "defn-" + "fn" + "fn*" + "defmacro" + "defmethod") + eol) + "Regexp that matches a symbol of definition with arguments vector.") + +(defconst clojure-ts--completion-let-like-sym-regex + (rx bol + (or "let" + "if-let" + "when-let" + "if-some" + "when-some" + "loop" + "with-open" + "dotimes" + "with-local-vars") + eol) + "Regexp that matches a symbol of let-like form.") + +(defconst clojure-ts--completion-locals-query + (treesit-query-compile 'clojure `((vec_lit (sym_lit) @local-candidate) + (map_lit (sym_lit) @local-candidate))) + "Query that matches a local binding symbol. + +Symbold must be a direct child of a vector or a map. This query covers +bindings vector as well as destructuring syntax.") + +(defconst clojure-ts--completion-annotations + (list 'defun-candidate " Definition" + 'local-candidate " Local variable") + "Property list of completion candidate type and annotation string.") + +(defun clojure-ts--completion-annotation-function (candidate) + "Return annotation for a completion CANDIDATE." + (thread-last minibuffer-completion-table + (alist-get candidate) + (plist-get clojure-ts--completion-annotations))) + +(defun clojure-ts--completion-defun-with-args-node-p (node) + "Return non-nil if NODE is a function definition with arguments." + (when-let* ((sym-name (clojure-ts--list-node-sym-text node))) + (string-match-p clojure-ts--completion-defn-with-args-sym-regex sym-name))) + +(defun clojure-ts--completion-fn-args-nodes () + "Return a list of captured nodes that represent function arguments. + +The function traverses the syntax tree upwards and returns nodes from +all functions along the way." + (let ((parent-defun (clojure-ts--parent-until #'clojure-ts--completion-defun-with-args-node-p)) + (captured-nodes)) + (while parent-defun + (when-let* ((args-vec (clojure-ts--node-child parent-defun "vec_lit"))) + (setq captured-nodes + (append captured-nodes + (treesit-query-capture args-vec clojure-ts--completion-locals-query)) + parent-defun (treesit-parent-until parent-defun + #'clojure-ts--completion-defun-with-args-node-p)))) + captured-nodes)) + +(defun clojure-ts--completion-let-like-node-p (node) + "Return non-nil if NODE is a let-like form." + (when-let* ((sym-name (clojure-ts--list-node-sym-text node))) + (string-match-p clojure-ts--completion-let-like-sym-regex sym-name))) + +(defun clojure-ts--completion-let-locals-nodes () + "Return a list of captured nodes that represent bindings in let forms. + +The function tranverses the syntax tree upwards and returns nodes from +all let bindings found along the way." + (let ((parent-let (clojure-ts--parent-until #'clojure-ts--completion-let-like-node-p)) + (captured-nodes)) + (while parent-let + (when-let* ((bindings-vec (clojure-ts--node-child parent-let "vec_lit"))) + (setq captured-nodes + (append captured-nodes + (treesit-query-capture bindings-vec clojure-ts--completion-locals-query)) + parent-let (treesit-parent-until parent-let + #'clojure-ts--completion-let-like-node-p)))) + captured-nodes)) + +(defun clojure-ts-completion-at-point-function () + "Return a completion table for the symbol around point." + (when-let* ((bounds (bounds-of-thing-at-point 'symbol)) + (source (treesit-buffer-root-node 'clojure)) + (nodes (append (treesit-query-capture source clojure-ts--completion-query-defuns) + (clojure-ts--completion-fn-args-nodes) + (clojure-ts--completion-let-locals-nodes)))) + (list (car bounds) + (cdr bounds) + (thread-last nodes + ;; Remove node at point + (seq-remove (lambda (item) (= (treesit-node-end (cdr item)) (point)))) + ;; Remove unwanted captured nodes + (seq-filter (lambda (item) + (not (member (car item) '(sym kwd))))) + ;; Produce alist of candidates + (seq-map (lambda (item) (cons (treesit-node-text (cdr item) t) (car item)))) + ;; Remove duplicated candidates + (seq-uniq)) + :exclusive 'no + :annotation-function #'clojure-ts--completion-annotation-function))) + (defvar clojure-ts-clojurescript-mode-map (let ((map (make-sparse-keymap))) (set-keymap-parent map clojure-ts-mode-map) @@ -2460,7 +2670,7 @@ parenthesis." (defconst clojure-ts-grammar-recipes '((clojure "https://github.com/sogaiu/tree-sitter-clojure.git" - "v0.0.13") + "unstable-20250526") (markdown-inline "https://github.com/MDeiml/tree-sitter-markdown" "v0.4.1" "tree-sitter-markdown-inline/src") @@ -2468,12 +2678,32 @@ parenthesis." "v0.24.3")) "Intended to be used as the value for `treesit-language-source-alist'.") +;; TODO: Eventually this should be replaced with `treesit-query-valid-p' +(defun clojure-ts--query-valid-p (query) + "Return non-nil if QUERY is valid in Clojure, nil otherwise." + (ignore-errors + (treesit-query-compile 'clojure query t) + t)) + +(defun clojure-ts--clojure-grammar-outdated-p () + "Return TRUE if currently installed grammar is outdated. + +This function checks if `clojure-ts-mode' is compatible with the +currently installed grammar. The simplest way to do this is to validate +a query that is valid in a previous grammar version but invalid in the +required version." + (clojure-ts--query-valid-p '((sym_lit (meta_lit))))) + (defun clojure-ts--ensure-grammars () "Install required language grammars if not already available." (when clojure-ts-ensure-grammars (dolist (recipe clojure-ts-grammar-recipes) (let ((grammar (car recipe))) - (unless (treesit-language-available-p grammar nil) + (when (or (not (treesit-language-available-p grammar nil)) + ;; If Clojure grammar is available, but outdated, re-install + ;; it. + (and (equal grammar 'clojure) + (clojure-ts--clojure-grammar-outdated-p))) (message "Installing %s Tree-sitter grammar" grammar) ;; `treesit-language-source-alist' is dynamically scoped. ;; Binding it in this let expression allows @@ -2494,6 +2724,35 @@ function can also be used to upgrade the grammars if they are outdated." (let ((treesit-language-source-alist clojure-ts-grammar-recipes)) (treesit-install-language-grammar grammar))))) +(defun clojure-ts--harvest-treesit-configs (mode) + "Harvest tree-sitter configs from MODE. +Return a plist with the following keys and value: + + :font-lock (from `treesit-font-lock-settings') + :simple-indent (from `treesit-simple-indent-rules')" + (with-temp-buffer + (funcall mode) + (list :font-lock treesit-font-lock-settings + :simple-indent treesit-simple-indent-rules))) + +(defun clojure-ts--add-config-for-mode (mode) + "Add configurations for MODE to current buffer. + +Configuration includes font-lock and indent. For font-lock rules, use +the same features enabled in MODE." + (let ((configs (clojure-ts--harvest-treesit-configs mode))) + (setq treesit-font-lock-settings + (append treesit-font-lock-settings + (plist-get configs :font-lock))) + ;; FIXME: This works a bit aggressively. `indent-region' always tries to + ;; use rules for embedded parser. Without it users can format embedded code + ;; in an arbitrary way. + ;; + ;; (setq treesit-simple-indent-rules + ;; (append treesit-simple-indent-rules + ;; (plist-get configs :simple-indent))) + )) + (defun clojure-ts-mode-variables (&optional markdown-available regex-available) "Initialize buffer-local variables for `clojure-ts-mode'. @@ -2539,7 +2798,10 @@ REGEX-AVAILABLE." clojure-ts--imenu-settings) (when (boundp 'treesit-thing-settings) ;; Emacs 30+ - (setq-local treesit-thing-settings clojure-ts--thing-settings))) + (setq-local treesit-thing-settings clojure-ts--thing-settings)) + + (when clojure-ts-completion-enabled + (add-to-list 'completion-at-point-functions #'clojure-ts-completion-at-point-function))) ;;;###autoload (define-derived-mode clojure-ts-mode prog-mode "Clojure[TS]" @@ -2601,7 +2863,20 @@ REGEX-AVAILABLE." (define-derived-mode clojure-ts-clojurescript-mode clojure-ts-mode "ClojureScript[TS]" "Major mode for editing ClojureScript code. -\\{clojure-ts-clojurescript-mode-map}") +\\{clojure-ts-clojurescript-mode-map}" + (when (and clojure-ts-clojurescript-use-js-parser + (treesit-ready-p 'javascript t)) + (setq-local treesit-range-settings + (append treesit-range-settings + (treesit-range-rules + :embed 'javascript + :host 'clojure + :local t + '(((list_lit (sym_lit) @_sym-name + :anchor (str_lit (str_content) @capture)) + (:equal @_sym-name "js*")))))) + (clojure-ts--add-config-for-mode 'js-ts-mode) + (treesit-major-mode-setup))) ;;;###autoload (define-derived-mode clojure-ts-clojurec-mode clojure-ts-mode "ClojureC[TS]" @@ -2619,7 +2894,20 @@ REGEX-AVAILABLE." (define-derived-mode clojure-ts-jank-mode clojure-ts-mode "Jank[TS]" "Major mode for editing Jank code. -\\{clojure-ts-jank-mode-map}") +\\{clojure-ts-jank-mode-map}" + (when (and clojure-ts-jank-use-cpp-parser + (treesit-ready-p 'cpp t)) + (setq-local treesit-range-settings + (append treesit-range-settings + (treesit-range-rules + :embed 'cpp + :host 'clojure + :local t + '(((list_lit (sym_lit) @_sym-name + :anchor (str_lit (str_content) @capture)) + (:equal @_sym-name "native/raw")))))) + (clojure-ts--add-config-for-mode 'c++-ts-mode) + (treesit-major-mode-setup))) (defun clojure-ts--register-novel-modes () "Set up Clojure modes not present in progenitor clojure-mode.el." @@ -2671,11 +2959,15 @@ Useful if you want to switch to the `clojure-mode's mode mappings." (treesit-query-compile 'clojure '(((source (list_lit + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit name: (sym_name) @ns) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit name: (sym_name) @ns-name))) (:equal @ns "ns")) ((source (list_lit + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit name: (sym_name) @in-ns) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (quoting_lit :anchor (sym_lit name: (sym_name) @ns-name)))) (:equal @in-ns "in-ns"))))) diff --git a/doc/design.md b/doc/design.md index 8afeaff..e1d6b05 100644 --- a/doc/design.md +++ b/doc/design.md @@ -32,29 +32,43 @@ In short: ## tree-sitter-clojure -Clojure-ts-mode uses the tree-sitter-clojure grammar, which can be found at -The clojure-ts-mode grammar provides very basic, low level nodes that try to match Clojure's very light syntax. +`clojure-ts-mode` uses the experimental version tree-sitter-clojure grammar, which +can be found at +. The +`clojure-ts-mode` grammar provides very basic, low level nodes that try to match +Clojure's very light syntax. There are nodes to represent: -- Symbols (sym_lit) - - Contain (sym_ns) and (sym_name) nodes -- Keywords (kwd_lit) - - Contain (kwd_ns) and (kw_name) nodes -- Strings (str_lit) -- Chars (char_lit) -- Nil (nil_lit) -- Booleans (bool_lit) -- Numbers (num_lit) -- Comments (comment, dis_expr) - - dis_expr is the `#_` discard expression -- Lists (list_list) -- Vectors (vec_lit) -- Maps (map_lit) -- Sets (set_lit) - -There are also nodes to represent metadata, which appear on `meta:` child fields of the nodes the metadata is defined on. -For example a simple vector with metadata defined on it like so +- Symbols `(sym_lit)` + - Contain `(sym_ns)` and `(sym_name)` nodes +- Keywords `(kwd_lit)` + - Contain `(kwd_ns)` and `(kw_name)` nodes +- Strings `(str_lit)` + - Contains `(str_content)` node +- Chars `(char_lit)` +- Nil `(nil_lit)` +- Booleans `(bool_lit)` +- Numbers `(num_lit)` +- Comments `(comment, dis_expr)` + - `dis_expr` is the `#_` discard expression +- Lists `(list_list)` +- Vectors `(vec_lit)` +- Maps `(map_lit)` +- Sets `(set_lit)` +- Metadata nodes `(meta_lit)` +- Regex content `(regex_content)` +- Function literals `(anon_fn_lit)` + +The best place to learn more about the tree-sitter-clojure grammar is to read +the [grammar.js file from the tree-sitter-clojure repository](https://github.com/sogaiu/tree-sitter-clojure/blob/master/grammar.js "grammar.js"). + +### Difference between stable grammar and experimental + +#### Standalone metadata nodes + +Metadata nodes in stable grammar appear as child nodes of the nodes the metadata +is defined on. For example a simple vector with metadata defined on it like so: ```clojure ^:has-metadata [1] @@ -69,7 +83,28 @@ will produce a parse tree like so value: (num_lit)) ``` -The best place to learn more about the tree-sitter-clojure grammar is to read the [grammar.js file from the tree-sitter-clojure repository](https://github.com/sogaiu/tree-sitter-clojure/blob/master/grammar.js "grammar.js"). +Although it's somewhat closer to how Clojure treats metadata itself, in the +context of a text editor it creates some problems, which were discussed [here](https://github.com/sogaiu/tree-sitter-clojure/issues/65). To +name a few: + +- `forward-sexp` command would skip both, metadata and the node it's attached + to. Called from an opening paren it would signal an error "No more sexp to + move across". +- `kill-sexp` command would kill both, metadata and the node it's attached to. +- `backward-up-list` called from the inside of a list with metadata would move + point to the beginning of metadata node. +- Internally we had to introduce some workarounds to skip metadata nodes or + figure out where the actual node starts. + +#### Special nodes for string content and regex content + +To parse the content of certain strings with a separate grammar, it is necessary +to extract the string's content, excluding its opening and closing quotes. To +achieve this, Emacs 31 allows specifying offsets for `treesit-range-settings`. +However, in Emacs 30.1, this feature is broken due to bug [#77848](https://debbugs.gnu.org/cgi/bugreport.cgi?bug=77848) (a fix is +anticipated in Emacs 30.2). The presence of `str_content` and `regex_content` nodes +allows us to support this feature across all Emacs versions without relying on +offsets. ### Clojure Syntax, not Clojure Semantics @@ -148,12 +183,132 @@ changes in the grammar. ## Syntax Highlighting -TODO +To set up Tree-sitter fontification, `clojure-ts-mode` sets the +`treesit-font-lock-settings` variable with the output of +`clojure-ts--font-lock-settings`, and then calls `treesit-major-mode-setup`. -## Indentation +`clojure-ts--font-lock-settings` returns a list of compiled queries. Each query +must have at least one capture name (names that start with `@`). If a capture +name matches an existing face name (e.g., `font-lock-keyword-face`), the +captured node will be fontified with that face. + +A capture name can also be arbitrary and used to check the text of the captured +node. It can also be used for both fontification and text checking. For +example in the following query: + +```emacs-lisp +`((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) + (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) +``` + +We match any list whose first symbol (skipping any number of comments and +metadata nodes) does not have a namespace and matches a regex stored in the +`clojure-ts--builtin-symbol-regexp` variable. The matched symbol is fontified +using `font-lock-keyword-face`. + +### Embedded parsers + +The Clojure grammar in `clojure-ts-mode` is a main or "host" grammar. Emacs +also supports the use of any number of "embedded" grammars. `clojure-ts-mode` +currently uses the `markdown-inline` grammar to highlight Markdown constructs in +docstrings and the `regex` grammar to highlight regular expression syntax. + +To use an embedded parser, `clojure-ts-mode` must set an appropriate value for +the `treesit-range-settings` variable. The Clojure grammar provides convenient +nodes to capture only the content of strings and regexes, which makes defining +range settings for regexes quite simple: + +```emacs-lisp +(treesit-range-rules + :embed 'regex + :host 'clojure + :local t + '((regex_content) @capture)) +``` + +For docstrings, the query is a bit more complex. Therefore, we have the +function `clojure-ts--docstring-query`, which is used for syntax highlighting, +indentation rules, and range settings for the embedded Markdown parser: + +```emacs-lisp +(treesit-range-rules + :embed 'markdown-inline + :host 'clojure + :local t + (clojure-ts--docstring-query '@capture)) + ``` -TODO +It is important to use the `:local` option for embedded parsers; otherwise, the +range will not be restricted to the captured node, which will lead to broken +fontification (see bug [#77733](https://debbugs.gnu.org/cgi/bugreport.cgi?bug=77733)). -## Semantic Interpretation in clojure-ts-mode +### Additional information -TODO: demonstrate how clojure-ts-mode creates semantic meaning from a given syntax tree. Show examples of how new semantic meaning can be added (with highlighting, indentation, etc). +To find more details one can evaluate the following expression in Emacs: + +```emacs-lisp +(info "(elisp) Parser-based Font Lock") +``` + +## Indentation + +To enable the parser-based indentation engine, `clojure-ts-mode` sets the +`treesit-simple-indent-rules` with the output of +`clojure-ts--configured-indent-rules`, and then call `treesit-major-mode-setup`. + +According to the documentation of `treesit-simple-indnet-rules` variable, its +values is: + +> A list of indent rule settings. +> Each indent rule setting should be (LANGUAGE RULE...), where LANGUAGE is +> a language symbol, and each RULE is of the form +> +> (MATCHER ANCHOR OFFSET) +> +> MATCHER determines whether this rule applies, ANCHOR and +> OFFSET together determines which column to indent to. + +For example rule like this: + +```emacs-lisp +'((clojure + ((parent-is "^vec_lit$") parent 1) + ((parent-is "^map_lit$") parent 1) + ((parent-is "^set_lit$") parent 2))) +``` + +will indent any node whose parent node is a `vec_lit` or `map_lit` with 1 space, +starting from the beginning of the parent node. For `set_lit`, it will add two +spaces because sets have two opening characters: `#` and `{`. + +In the example above, the `parent-is` matcher and `parent` anchor are built-in +presets. There are many predefined presets provided by Emacs. The list of all +available presets can be found in the documentation for the +`treesit-simple-indent-presets` variable. + +Sometimes, more complex behavior than predefined built-in presets is required. +In such cases, you can write your own matchers and anchors. One good example is +the `clojure-ts--match-form-body` matcher. It attempts to match a node at point +using the combined value of `clojure-ts--semantic-indent-rules-defaults` and +`clojure-ts-semantic-indent-rules`. These rules have a similar format to cljfmt +indentation rules. `clojure-ts-semantic-indent-rules` is a customization option +that users can tweak. `clojure-ts--match-form-body` traverses the syntax tree, +starting from the node at point, towards the top of the tree in order to find a +match. In addition to `clojure-ts--semantic-indent-rules-defaults` and +`clojure-ts-semantic-indent-rules`, it may also use `clojure-ts-get-indent-function` +if it is not `nil`. This function provides an API for dynamic indentation and +must return a value compatible with `cider-nrepl`. Searching for an indentation +rule across all these variables is slow; therefore, +`clojure-ts--semantic-indent-rules-cache` was introduced. It is set when +`clojure-ts-mode` is activated in a Clojure source buffer and refreshed every time +`clojure-ts-semantic-indent-rules` is updated (using setopt or the customization +interface) or when a `.dir-locals.el` file is updated. + +### Additional information + +To find more details one can evaluate the following expression in Emacs: + +```emacs-lisp +(info "(elisp) Parser-based Indentation") +``` diff --git a/test/clojure-ts-mode-completion.el b/test/clojure-ts-mode-completion.el new file mode 100644 index 0000000..1bc92ce --- /dev/null +++ b/test/clojure-ts-mode-completion.el @@ -0,0 +1,153 @@ +;;; clojure-ts-mode-completion.el --- clojure-ts-mode: completion tests -*- lexical-binding: t; -*- + +;; Copyright (C) 2025 Roman Rudakov + +;; Author: Roman Rudakov +;; Keywords: + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;;; Commentary: + +;; Completion is a unique `clojure-ts-mode' feature. + +;;; Code: + +(require 'clojure-ts-mode) +(require 'buttercup) +(require 'test-helper "test/test-helper") + +(describe "clojure-ts-complete-at-point-function" + ;; NOTE: This function returns unfiltered candidates, so prefix doesn't really + ;; matter here. + + (it "should complete global vars" + (with-clojure-ts-buffer-point " +(def foo :first) + +(def bar :second) + +(defn baz + [] + (println foo bar)) + +b|" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("foo" . defun-candidate) + ("bar" . defun-candidate) + ("baz" . defun-candidate))))) + + (it "should complete function arguments" + (with-clojure-ts-buffer-point " +(def foo :first) + +(def bar :second) + +(defn baz + [username] + (println u|))" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("foo" . defun-candidate) + ("bar" . defun-candidate) + ("baz" . defun-candidate) + ("username" . local-candidate))))) + + (it "should not complete function arguments outside of function" + (with-clojure-ts-buffer-point " +(def foo :first) + +(def bar :second) + +(defn baz + [username] + (println bar)) + +u|" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("foo" . defun-candidate) + ("bar" . defun-candidate) + ("baz" . defun-candidate))))) + + (it "should complete destructured function arguments" + (with-clojure-ts-buffer-point " +(defn baz + [{:keys [username]}] + (println u|))" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("baz" . defun-candidate) + ("username" . local-candidate)))) + + (with-clojure-ts-buffer-point " +(defn baz + [{:strs [username]}] + (println u|))" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("baz" . defun-candidate) + ("username" . local-candidate)))) + + (with-clojure-ts-buffer-point " +(defn baz + [{:syms [username]}] + (println u|))" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("baz" . defun-candidate) + ("username" . local-candidate)))) + + (with-clojure-ts-buffer-point " +(defn baz + [{username :name}] + (println u|))" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("baz" . defun-candidate) + ("username" . local-candidate)))) + + (with-clojure-ts-buffer-point " +(defn baz + [[first-name last-name]] + (println f|))" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("baz" . defun-candidate) + ("first-name" . local-candidate) + ("last-name" . local-candidate))))) + + (it "should complete vector bindings" + (with-clojure-ts-buffer-point " +(defn baz + [first-name] + (let [last-name \"Doe\" + address {:street \"Whatever\" :zip-code 2222} + {:keys [street zip-code]} address] + a|))" + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("baz" . defun-candidate) + ("first-name" . local-candidate) + ("last-name" . local-candidate) + ("address" . local-candidate) + ("street" . local-candidate) + ("zip-code" . local-candidate))))) + + (it "should not complete called function names" + (with-clojure-ts-buffer-point " +(defn baz + [first-name] + (let [full-name (str first-name \"Doe\")] + s|))" + ;; `str' should not be among the candidates. + (expect (nth 2 (clojure-ts-completion-at-point-function)) + :to-equal '(("baz" . defun-candidate) + ("first-name" . local-candidate) + ("full-name" . local-candidate)))))) + +(provide 'clojure-ts-mode-completion) +;;; clojure-ts-mode-completion.el ends here diff --git a/test/clojure-ts-mode-font-lock-test.el b/test/clojure-ts-mode-font-lock-test.el index 8611211..4770ccf 100644 --- a/test/clojure-ts-mode-font-lock-test.el +++ b/test/clojure-ts-mode-font-lock-test.el @@ -223,4 +223,29 @@ DESCRIPTION is the description of the spec." (2 12 font-lock-keyword-face) (14 14 font-lock-type-face) (19 21 font-lock-function-name-face) - (34 39 font-lock-function-name-face)))) + (34 39 font-lock-function-name-face)) + + ("(extend-protocol prepare/SettableParameter + clojure.lang.IPersistentMap + (set-parameter [m ^PreparedStatement s i] + (.setObject s i (->pgobject m))))" + (81 93 font-lock-function-name-face)))) + +;;;; Extra def forms + +(describe "clojure-ts-extra-def-forms" + (it "should respect the value of clojure-ts-extra-def-forms" + (with-clojure-ts-buffer "(defelem file-upload + \"Creates a file upload input.\" + [name] + (input-field \"file\" name nil))" + (setopt clojure-ts-extra-def-forms '("defelem")) + (clojure-ts-mode) + (font-lock-ensure) + (goto-char (point-min)) + (expect (get-text-property 2 'face) + :to-equal 'font-lock-keyword-face) + (expect (get-text-property 10 'face) + :to-equal 'font-lock-function-name-face) + (expect (get-text-property 25 'face) + :to-equal 'font-lock-doc-face)))) diff --git a/test/clojure-ts-mode-indentation-test.el b/test/clojure-ts-mode-indentation-test.el index bda3538..d158ed8 100644 --- a/test/clojure-ts-mode-indentation-test.el +++ b/test/clojure-ts-mode-indentation-test.el @@ -124,7 +124,7 @@ DESCRIPTION is a string with the description of the spec." ;; Mock `cider--get-symbol-indent' function (defun cider--get-symbol-indent-mock (symbol-name) - "Returns static mocked indentation specs for SYMBOL-NAME if available." + "Return static mocked indentation specs for SYMBOL-NAME if available." (when (stringp symbol-name) (cond ((string-equal symbol-name "my-with-in-str") 1) diff --git a/test/clojure-ts-mode-refactor-add-arity-test.el b/test/clojure-ts-mode-refactor-add-arity-test.el index 9c31f27..f119607 100644 --- a/test/clojure-ts-mode-refactor-add-arity-test.el +++ b/test/clojure-ts-mode-refactor-add-arity-test.el @@ -324,6 +324,20 @@ (clojure-ts-add-arity)) + (when-refactoring-with-point-it "should handle an extend-protocol" + "(extend-protocol prepare/SettableParameter + clojure.lang.IPersistentMap + (set-parameter [m ^PreparedStatement s i] + (.setObject| s i (->pgobject m))))" + + "(extend-protocol prepare/SettableParameter + clojure.lang.IPersistentMap + (set-parameter [|]) + (set-parameter [m ^PreparedStatement s i] + (.setObject s i (->pgobject m))))" + + (clojure-ts-add-arity)) + (it "should signal a user error when point is not inside a function body" (with-clojure-ts-buffer-point " (letf|n [(foo diff --git a/test/clojure-ts-mode-refactor-threading-test.el b/test/clojure-ts-mode-refactor-threading-test.el index ce26d5d..35e1ebb 100644 --- a/test/clojure-ts-mode-refactor-threading-test.el +++ b/test/clojure-ts-mode-refactor-threading-test.el @@ -205,6 +205,13 @@ (clojure-ts-unwind) (clojure-ts-unwind)) + (when-refactoring-it "should work correctly when there is only one expression" + "(->> (filter even? [1 2 3 4]))" + + "(filter even? [1 2 3 4])" + + (clojure-ts-unwind)) + (when-refactoring-it "should unwind N steps with numeric prefix arg" "(->> [1 2 3 4 5] (filter even?) diff --git a/test/samples/completion.clj b/test/samples/completion.clj new file mode 100644 index 0000000..16b64de --- /dev/null +++ b/test/samples/completion.clj @@ -0,0 +1,56 @@ +(ns completion) + +(def my-var "Hello") +(def my-another-var "World") + +(defn- my-function + "This is a docstring." + [some-arg] + (let [to-print (str "Hello" some-arg)] + (println my-var my-another-var to-print))) + +(fn [anon-arg] + anon-arg) + +(def hello-string "Hello") + +(defn complete-example + "Docstring won't interfere with completion." + [arg1 arg2 & {:keys [destructured]}] + ;; Here only function args and globals should be completed. + (println arg1 arg2 destructured) + (let [foo "bar" ; comment + baz ^String hello + map-var {:users/usename "Roma"} + {:users/keys [username]} map-var + another-map {:address "Universe"} + {custom-address :address} another-map + bar :kwd] + ;; Here let bindings are available in addition to globals and function args. + (println arg1 foo map-var custom-address username) + (when-let [nested-var "Whatever"] + (with-open [output-stream (io/output-stream "some-file")] + (println foo + baz + hello + map-var + username + another-map + custom-address + bar) + ;; Here we should see everything + (output-stream nested-var output-stream another-map))) + ;; And here only let bindings, globals and function args again. + (println username))) + +(def vec-variable ["one" "two" "three"]) + +(let [[one two three] vec-variable] + (println one two three)) + +(defn nested-fn + [top-arg] + (filter (fn [item] + ;; Both arguments are available here. + (= item top-arg)) + [1 2 3 4 5])) diff --git a/test/samples/embed.cljs b/test/samples/embed.cljs new file mode 100644 index 0000000..22000a7 --- /dev/null +++ b/test/samples/embed.cljs @@ -0,0 +1,12 @@ +(ns embed) + +(js* "var hello = console.log('hello'); const now = new Date();") + +(js* "const hello = new Date(); + const someOtherVar = 'Just a string';") + +(println "This is a normal string") + +"Standalone string" + +(js* "var hello = 'world';") diff --git a/test/samples/indentation.clj b/test/samples/indentation.clj index 132a5f2..52b417e 100644 --- a/test/samples/indentation.clj +++ b/test/samples/indentation.clj @@ -228,15 +228,24 @@ :foo "bar"} -;; NOTE: List elements with metadata are not indented correctly. '(one two ^:foo - three) + three) ^{:nextjournal.clerk/visibility {:code :hide}} (defn actual [args]) +(println "Hello" + "World") + +#(println + "hello" + %) + +#(println "hello" + %) + (def ^:private hello "World") diff --git a/test/samples/native.jank b/test/samples/native.jank index bf07596..1eb03c7 100644 --- a/test/samples/native.jank +++ b/test/samples/native.jank @@ -4,7 +4,11 @@ (defn set-shader-source! [shader source] (native/raw "auto const shader(detail::to_int(~{ shader })); auto const &source(detail::to_string(~{ source })); + __value = make_box(); __value = make_box(glShaderSource(shader, 1, &source.data, nullptr));")) (defn compile-shader! [shader] - (native/raw "__value = make_box(glCompileShader(detail::to_int(~{ shader })));")) + (native/raw "__value = make_box(glCompileShader(detail::to_int(~{ shader })));") + "Normal string") + +"Normal string" diff --git a/test/samples/navigation.clj b/test/samples/navigation.clj new file mode 100644 index 0000000..26bdf44 --- /dev/null +++ b/test/samples/navigation.clj @@ -0,0 +1,14 @@ +(ns navigation) + +(let [my-var ^{:foo "bar"} (= "Hello" "Hello")]) + +(let [my-var ^boolean (= "Hello" "world")]) + +#(+ % %) + +^boolean (= 2 2) + +(defn- to-string + ^String + [arg] + (.toString arg)) diff --git a/test/samples/refactoring.clj b/test/samples/refactoring.clj index 10f12b5..5a87bf7 100644 --- a/test/samples/refactoring.clj +++ b/test/samples/refactoring.clj @@ -141,3 +141,9 @@ (when-not true (println "Hello world")) + +(extend-protocol prepare/SettableParameter + clojure.lang.IPersistentMap + (set-parameter []) + (set-parameter [m ^PreparedStatement s i] + (.setObject| s i (->pgobject m)))) diff --git a/test/test-helper.el b/test/test-helper.el index fa821e6..f1515d9 100644 --- a/test/test-helper.el +++ b/test/test-helper.el @@ -48,7 +48,7 @@ and point left there." ,@body))) (defun clojure-ts--s-index-of (needle s &optional ignore-case) - "Returns first index of NEEDLE in S, or nil. + "Return first index of NEEDLE in S, or nil. If IGNORE-CASE is non-nil, the comparison is done without paying attention to case differences."