From 525ba527bb3870e07617e7388bed8750820cb185 Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Fri, 29 Apr 2022 20:11:11 +0200 Subject: [PATCH 01/33] snowflake: add qualify expression (#465) Signed-off-by: Maciej Obuchowski --- src/ast/query.rs | 5 +++++ src/keywords.rs | 2 ++ src/parser.rs | 7 ++++++ tests/sqlparser_common.rs | 42 ++++++++++++++++++++++++++++++++++- tests/sqlparser_mysql.rs | 2 ++ tests/sqlparser_postgres.rs | 1 + tests/sqpparser_clickhouse.rs | 3 ++- 7 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 82341f632..7526c23cc 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -154,6 +154,8 @@ pub struct Select { pub sort_by: Vec, /// HAVING pub having: Option, + /// QUALIFY (Snowflake) + pub qualify: Option, } impl fmt::Display for Select { @@ -202,6 +204,9 @@ impl fmt::Display for Select { if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } + if let Some(ref qualify) = self.qualify { + write!(f, " QUALIFY {}", qualify)?; + } Ok(()) } } diff --git a/src/keywords.rs b/src/keywords.rs index ad31bda5c..ccb4f3f93 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -382,6 +382,7 @@ define_keywords!( PROCEDURE, PROGRAM, PURGE, + QUALIFY, QUARTER, QUOTE, RANGE, @@ -584,6 +585,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, Keyword::SET, + Keyword::QUALIFY, ]; /// Can't be used as a column alias, so that `SELECT alias` diff --git a/src/parser.rs b/src/parser.rs index 1ca493b98..e855bdf4a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3167,6 +3167,12 @@ impl<'a> Parser<'a> { None }; + let qualify = if self.parse_keyword(Keyword::QUALIFY) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(Select { distinct, top, @@ -3180,6 +3186,7 @@ impl<'a> Parser<'a> { distribute_by, sort_by, having, + qualify, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8471b2c5f..e0c7f34b2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1322,6 +1322,45 @@ fn parse_select_having() { assert!(select.having.is_some()); } +#[cfg(feature = "bigdecimal")] +#[test] +fn parse_select_qualify() { + let sql = "SELECT i, p, o FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1"; + let select = verified_only_select(sql); + assert_eq!( + Some(Expr::BinaryOp { + left: Box::new(Expr::Function(Function { + name: ObjectName(vec![Ident::new("ROW_NUMBER")]), + args: vec![], + over: Some(WindowSpec { + partition_by: vec![Expr::Identifier(Ident::new("p"))], + order_by: vec![OrderByExpr { + expr: Expr::Identifier(Ident::new("o")), + asc: None, + nulls_first: None + }], + window_frame: None + }), + distinct: false + })), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("1"))) + }), + select.qualify + ); + + let sql = "SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) AS row_num FROM qt QUALIFY row_num = 1"; + let select = verified_only_select(sql); + assert_eq!( + Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("row_num"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("1"))) + }), + select.qualify + ); +} + #[test] fn parse_limit_accepts_all() { one_statement_parses_to( @@ -4336,7 +4375,8 @@ fn parse_merge() { cluster_by: vec![], distribute_by: vec![], sort_by: vec![], - having: None + having: None, + qualify: None })), order_by: vec![], limit: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e3566bca9..454c6639e 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -314,6 +314,7 @@ fn parse_quote_identifiers_2() { distribute_by: vec![], sort_by: vec![], having: None, + qualify: None })), order_by: vec![], limit: None, @@ -754,6 +755,7 @@ fn parse_substring_in_select() { distribute_by: vec![], sort_by: vec![], having: None, + qualify: None })), order_by: vec![], limit: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index eb42edc8d..125ce9f6e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -450,6 +450,7 @@ fn parse_update_set_from() { distribute_by: vec![], sort_by: vec![], having: None, + qualify: None })), order_by: vec![], limit: None, diff --git a/tests/sqpparser_clickhouse.rs b/tests/sqpparser_clickhouse.rs index dfd555200..59fc91c88 100644 --- a/tests/sqpparser_clickhouse.rs +++ b/tests/sqpparser_clickhouse.rs @@ -96,7 +96,8 @@ fn parse_map_access_expr() { cluster_by: vec![], distribute_by: vec![], sort_by: vec![], - having: None + having: None, + qualify: None }, select ); From 7732c34b195e5fffeeb8d1fb145b1aad082c345a Mon Sep 17 00:00:00 2001 From: yuval-illumex <85674443+yuval-illumex@users.noreply.github.com> Date: Mon, 2 May 2022 21:02:28 +0300 Subject: [PATCH 02/33] Add support in IS boolean filter (#474) * Add support in IS TRUE IS FALSE * Fix lint * Add test for is false --- src/parser.rs | 14 +++++++++++++- tests/sqlparser_common.rs | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index e855bdf4a..5ab680069 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1130,9 +1130,21 @@ impl<'a> Parser<'a> { { let expr2 = self.parse_expr()?; Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2))) + } else if let Some(right) = + self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) + { + let mut val = Value::Boolean(true); + if right == Keyword::FALSE { + val = Value::Boolean(false); + } + Ok(Expr::BinaryOp { + left: Box::new(expr), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(val)), + }) } else { self.expected( - "[NOT] NULL or [NOT] DISTINCT FROM after IS", + "[NOT] NULL or [NOT] DISTINCT FROM TRUE FALSE after IS", self.peek_token(), ) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e0c7f34b2..43db2f769 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4658,3 +4658,25 @@ fn parse_position_negative() { res.unwrap_err() ); } + +#[test] +fn parse_is_boolean() { + one_statement_parses_to( + "SELECT f from foo where field is true", + "SELECT f FROM foo WHERE field = true", + ); + + one_statement_parses_to( + "SELECT f from foo where field is false", + "SELECT f FROM foo WHERE field = false", + ); + + let sql = "SELECT f from foo where field is 0"; + let res = parse_sql_statements(sql); + assert_eq!( + ParserError::ParserError( + "Expected [NOT] NULL or [NOT] DISTINCT FROM TRUE FALSE after IS, found: 0".to_string() + ), + res.unwrap_err() + ); +} From f5980cd30fa115229dc044485ea93c76248177c5 Mon Sep 17 00:00:00 2001 From: Dmitry Patsura Date: Mon, 2 May 2022 21:04:33 +0300 Subject: [PATCH 03/33] feat: Support KILL statement (#479) --- src/ast/mod.rs | 36 +++++++++++++++++++++++++++++++++++ src/keywords.rs | 4 ++++ src/parser.rs | 27 ++++++++++++++++++++++++++ tests/sqlparser_mysql.rs | 30 +++++++++++++++++++++++++++++ tests/sqpparser_clickhouse.rs | 12 ++++++++++++ 5 files changed, 109 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 89e135756..f3e96d0c5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1005,6 +1005,13 @@ pub enum Statement { data_types: Vec, statement: Box, }, + /// See + /// See + Kill { + modifier: Option, + // processlist_id + id: u64, + }, /// EXPLAIN TABLE /// Note: this is a MySQL-specific statement. See ExplainTable { @@ -1047,6 +1054,15 @@ impl fmt::Display for Statement { #[allow(clippy::cognitive_complexity)] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + Statement::Kill { modifier, id } => { + write!(f, "KILL ")?; + + if let Some(m) = modifier { + write!(f, "{} ", m)?; + } + + write!(f, "{}", id) + } Statement::ExplainTable { describe_alias, table_name, @@ -2097,6 +2113,26 @@ impl fmt::Display for ObjectType { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum KillType { + Connection, + Query, + Mutation, +} + +impl fmt::Display for KillType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + // MySQL + KillType::Connection => "CONNECTION", + KillType::Query => "QUERY", + // Clickhouse supports Mutation + KillType::Mutation => "MUTATION", + }) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum HiveDistributionStyle { diff --git a/src/keywords.rs b/src/keywords.rs index ccb4f3f93..709b60bf8 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -136,6 +136,7 @@ define_keywords!( COMPUTE, CONDITION, CONNECT, + CONNECTION, CONSTRAINT, CONTAINS, CONVERT, @@ -279,6 +280,7 @@ define_keywords!( JSONFILE, JULIAN, KEY, + KILL, LAG, LANGUAGE, LARGE, @@ -319,6 +321,7 @@ define_keywords!( MONTH, MSCK, MULTISET, + MUTATION, NATIONAL, NATURAL, NCHAR, @@ -384,6 +387,7 @@ define_keywords!( PURGE, QUALIFY, QUARTER, + QUERY, QUOTE, RANGE, RANK, diff --git a/src/parser.rs b/src/parser.rs index 5ab680069..01b021a0b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -154,6 +154,7 @@ impl<'a> Parser<'a> { pub fn parse_statement(&mut self) -> Result { match self.next_token() { Token::Word(w) => match w.keyword { + Keyword::KILL => Ok(self.parse_kill()?), Keyword::DESCRIBE => Ok(self.parse_explain(true)?), Keyword::EXPLAIN => Ok(self.parse_explain(false)?), Keyword::ANALYZE => Ok(self.parse_analyze()?), @@ -2878,6 +2879,32 @@ impl<'a> Parser<'a> { }) } + // KILL [CONNECTION | QUERY] processlist_id + pub fn parse_kill(&mut self) -> Result { + let modifier_keyword = + self.parse_one_of_keywords(&[Keyword::CONNECTION, Keyword::QUERY, Keyword::MUTATION]); + + let id = self.parse_literal_uint()?; + + let modifier = match modifier_keyword { + Some(Keyword::CONNECTION) => Some(KillType::Connection), + Some(Keyword::QUERY) => Some(KillType::Query), + Some(Keyword::MUTATION) => { + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + Some(KillType::Mutation) + } else { + self.expected( + "Unsupported type for KILL, allowed: CONNECTION | QUERY", + self.peek_token(), + )? + } + } + _ => None, + }; + + Ok(Statement::Kill { modifier, id }) + } + pub fn parse_explain(&mut self, describe_alias: bool) -> Result { let analyze = self.parse_keyword(Keyword::ANALYZE); let verbose = self.parse_keyword(Keyword::VERBOSE); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 454c6639e..ac265875d 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -770,6 +770,36 @@ fn parse_substring_in_select() { } } +#[test] +fn parse_kill() { + let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5"); + assert_eq!( + stmt, + Statement::Kill { + modifier: Some(KillType::Connection), + id: 5, + } + ); + + let stmt = mysql_and_generic().verified_stmt("KILL QUERY 5"); + assert_eq!( + stmt, + Statement::Kill { + modifier: Some(KillType::Query), + id: 5, + } + ); + + let stmt = mysql_and_generic().verified_stmt("KILL 5"); + assert_eq!( + stmt, + Statement::Kill { + modifier: None, + id: 5, + } + ); +} + fn mysql() -> TestedDialects { TestedDialects { dialects: vec![Box::new(MySqlDialect {})], diff --git a/tests/sqpparser_clickhouse.rs b/tests/sqpparser_clickhouse.rs index 59fc91c88..ab8c28f21 100644 --- a/tests/sqpparser_clickhouse.rs +++ b/tests/sqpparser_clickhouse.rs @@ -119,6 +119,18 @@ fn parse_array_expr() { ) } +#[test] +fn parse_kill() { + let stmt = clickhouse().verified_stmt("KILL MUTATION 5"); + assert_eq!( + stmt, + Statement::Kill { + modifier: Some(KillType::Mutation), + id: 5, + } + ); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], From a9d7f7af1f746046d3940c649f21d55ef22c950b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 2 May 2022 17:49:06 -0400 Subject: [PATCH 04/33] Improve docs for KILL statement (#481) --- src/ast/mod.rs | 4 +++- src/parser.rs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f3e96d0c5..a052cd976 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1005,6 +1005,8 @@ pub enum Statement { data_types: Vec, statement: Box, }, + /// KILL [CONNECTION | QUERY | MUTATION] + /// /// See /// See Kill { @@ -2464,7 +2466,7 @@ impl fmt::Display for CopyLegacyCsvOption { } } -/// +/// #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum MergeClause { diff --git a/src/parser.rs b/src/parser.rs index 01b021a0b..203b7e3f8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2879,7 +2879,7 @@ impl<'a> Parser<'a> { }) } - // KILL [CONNECTION | QUERY] processlist_id + // KILL [CONNECTION | QUERY | MUTATION] processlist_id pub fn parse_kill(&mut self) -> Result { let modifier_keyword = self.parse_one_of_keywords(&[Keyword::CONNECTION, Keyword::QUERY, Keyword::MUTATION]); From 7fc6361fe84a7932957f2398b6f2b206845349f9 Mon Sep 17 00:00:00 2001 From: Maciej Skrzypkowski Date: Wed, 4 May 2022 17:11:00 +0200 Subject: [PATCH 05/33] Add Redshift dialect, handle square brackets properly (#471) * Redshift square bracket handling We need to detect `[` or `"` for Redshift quotes around indentifier and at the same time exclude treating JSON paths as indentifer * RedshiftSqlDialect documentation update Co-authored-by: Andrew Lamb * Renamed _chars to chars * Fixed warnings * Missing license Co-authored-by: Maciej Skrzypkowski Co-authored-by: Andrew Lamb --- examples/cli.rs | 1 + src/ast/ddl.rs | 2 +- src/dialect/mod.rs | 8 +++ src/dialect/redshift.rs | 53 +++++++++++++++++++ src/test_utils.rs | 1 + src/tokenizer.rs | 7 ++- tests/sqlparser_postgres.rs | 2 +- tests/sqlparser_redshift.rs | 102 ++++++++++++++++++++++++++++++++++++ 8 files changed, 173 insertions(+), 3 deletions(-) create mode 100644 src/dialect/redshift.rs create mode 100644 tests/sqlparser_redshift.rs diff --git a/examples/cli.rs b/examples/cli.rs index 38b3de841..1c177faaf 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -43,6 +43,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--mysql" => Box::new(MySqlDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), "--hive" => Box::new(HiveDialect {}), + "--redshift" => Box::new(RedshiftSqlDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {}", s), }; diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 4d8b3b5bf..1847f2518 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -14,7 +14,7 @@ //! (commonly referred to as Data Definition Language, or DDL) #[cfg(not(feature = "std"))] -use alloc::{boxed::Box, string::String, string::ToString, vec::Vec}; +use alloc::{boxed::Box, string::String, vec::Vec}; use core::fmt; #[cfg(feature = "serde")] diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 008b099d2..8defd66f3 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -17,11 +17,14 @@ mod hive; mod mssql; mod mysql; mod postgresql; +mod redshift; mod snowflake; mod sqlite; use core::any::{Any, TypeId}; use core::fmt::Debug; +use core::iter::Peekable; +use core::str::Chars; pub use self::ansi::AnsiDialect; pub use self::clickhouse::ClickHouseDialect; @@ -30,6 +33,7 @@ pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; +pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; pub use crate::keywords; @@ -51,6 +55,10 @@ pub trait Dialect: Debug + Any { fn is_delimited_identifier_start(&self, ch: char) -> bool { ch == '"' } + /// Determine if quoted characters are proper for identifier + fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { + true + } /// Determine if a character is a valid start character for an unquoted identifier fn is_identifier_start(&self, ch: char) -> bool; /// Determine if a character is a valid unquoted identifier character diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs new file mode 100644 index 000000000..e2faed396 --- /dev/null +++ b/src/dialect/redshift.rs @@ -0,0 +1,53 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; +use core::iter::Peekable; +use core::str::Chars; + +use super::PostgreSqlDialect; + +#[derive(Debug)] +pub struct RedshiftSqlDialect {} + +// In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. +// +// Notable differences: +// 1. Redshift treats brackets `[` and `]` differently. For example, `SQL SELECT a[1][2] FROM b` +// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will +// be a json path +impl Dialect for RedshiftSqlDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' || ch == '[' + } + + /// Determine if quoted characters are proper for identifier + /// It's needed to distinguish treating square brackets as quotes from + /// treating them as json path. If there is identifier then we assume + /// there is no json path. + fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable>) -> bool { + chars.next(); + let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable(); + if let Some(&ch) = not_white_chars.peek() { + return self.is_identifier_start(ch); + } + false + } + + fn is_identifier_start(&self, ch: char) -> bool { + PostgreSqlDialect {}.is_identifier_start(ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + PostgreSqlDialect {}.is_identifier_part(ch) + } +} diff --git a/src/test_utils.rs b/src/test_utils.rs index 27eba1408..1a432e47a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -141,6 +141,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(AnsiDialect {}), Box::new(SnowflakeDialect {}), Box::new(HiveDialect {}), + Box::new(RedshiftSqlDialect {}), ], } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 566deacec..9de656e34 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -431,7 +431,12 @@ impl<'a> Tokenizer<'a> { Ok(Some(Token::SingleQuotedString(s))) } // delimited (quoted) identifier - quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { + quote_start + if self.dialect.is_delimited_identifier_start(ch) + && self + .dialect + .is_proper_identifier_inside_quotes(chars.clone()) => + { chars.next(); // consume the opening quote let quote_end = Word::matching_end_quote(quote_start); let (s, last_char) = parse_quoted_ident(chars, quote_end); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 125ce9f6e..e5dc20957 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -590,7 +590,7 @@ fn test_copy_to() { #[test] fn parse_copy_from() { - let sql = "COPY table (a, b) FROM 'file.csv' WITH + let sql = "COPY table (a, b) FROM 'file.csv' WITH ( FORMAT CSV, FREEZE, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs new file mode 100644 index 000000000..0c216a497 --- /dev/null +++ b/tests/sqlparser_redshift.rs @@ -0,0 +1,102 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[macro_use] +mod test_utils; + +use test_utils::*; + +use sqlparser::ast::*; +use sqlparser::dialect::RedshiftSqlDialect; + +#[test] +fn test_square_brackets_over_db_schema_table_name() { + let select = redshift().verified_only_select("SELECT [col1] FROM [test_schema].[test_table]"); + assert_eq!( + select.projection[0], + SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "col1".to_string(), + quote_style: Some('[') + })), + ); + assert_eq!( + select.from[0], + TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('[') + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('[') + } + ]), + alias: None, + args: vec![], + with_hints: vec![], + }, + joins: vec![], + } + ); +} + +#[test] +fn brackets_over_db_schema_table_name_with_whites_paces() { + match redshift().parse_sql_statements("SELECT [ col1 ] FROM [ test_schema].[ test_table]") { + Ok(statements) => { + assert_eq!(statements.len(), 1); + } + _ => unreachable!(), + } +} + +#[test] +fn test_double_quotes_over_db_schema_table_name() { + let select = + redshift().verified_only_select("SELECT \"col1\" FROM \"test_schema\".\"test_table\""); + assert_eq!( + select.projection[0], + SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "col1".to_string(), + quote_style: Some('"') + })), + ); + assert_eq!( + select.from[0], + TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('"') + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('"') + } + ]), + alias: None, + args: vec![], + with_hints: vec![], + }, + joins: vec![], + } + ); +} + +fn redshift() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(RedshiftSqlDialect {})], + } +} From e68bdae5f259967dc54baa8fb2e95100a26c6f81 Mon Sep 17 00:00:00 2001 From: Dmitry Patsura Date: Thu, 5 May 2022 03:20:45 +0300 Subject: [PATCH 06/33] feat: Allow ArrayIndex for GenericDialect (#480) * feat: Allow ArrayIndex for GenericDialect * test array index with generic dialect too --- src/parser.rs | 2 +- tests/sqlparser_postgres.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 203b7e3f8..5df75d785 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1173,7 +1173,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), }) } else if Token::LBracket == tok { - if dialect_of!(self is PostgreSqlDialect) { + if dialect_of!(self is PostgreSqlDialect | GenericDialect) { // parse index return self.parse_array_index(expr); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index e5dc20957..0106e255d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1137,7 +1137,7 @@ fn parse_array_index_expr() { .collect(); let sql = "SELECT foo[0] FROM foos"; - let select = pg().verified_only_select(sql); + let select = pg_and_generic().verified_only_select(sql); assert_eq!( &Expr::ArrayIndex { obj: Box::new(Expr::Identifier(Ident::new("foo"))), @@ -1147,7 +1147,7 @@ fn parse_array_index_expr() { ); let sql = "SELECT foo[0][0] FROM foos"; - let select = pg().verified_only_select(sql); + let select = pg_and_generic().verified_only_select(sql); assert_eq!( &Expr::ArrayIndex { obj: Box::new(Expr::Identifier(Ident::new("foo"))), @@ -1157,7 +1157,7 @@ fn parse_array_index_expr() { ); let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; - let select = pg().verified_only_select(sql); + let select = pg_and_generic().verified_only_select(sql); assert_eq!( &Expr::ArrayIndex { obj: Box::new(Expr::Identifier(Ident::new("bar"))), @@ -1177,7 +1177,7 @@ fn parse_array_index_expr() { ); let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; - let select = pg().verified_only_select(sql); + let select = pg_and_generic().verified_only_select(sql); assert_eq!( &Expr::ArrayIndex { obj: Box::new(Expr::Nested(Box::new(Expr::Cast { From 8ef5fc8624bd8502cd86a69ce562c1587d496544 Mon Sep 17 00:00:00 2001 From: Dmitry Patsura Date: Fri, 6 May 2022 20:32:04 +0300 Subject: [PATCH 07/33] feat: Support ANY/ALL operators (#477) * feat: Support ANY/ALL operators * fix lint --- src/ast/mod.rs | 6 ++++++ src/parser.rs | 29 ++++++++++++++++++++++++----- tests/sqlparser_common.rs | 26 ++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a052cd976..f2f7f1498 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -270,6 +270,10 @@ pub enum Expr { op: BinaryOperator, right: Box, }, + /// Any operation e.g. `1 ANY (1)` or `foo > ANY(bar)`, It will be wrapped in the right side of BinaryExpr + AnyOp(Box), + /// ALL operation e.g. `1 ALL (1)` or `foo > ALL(bar)`, It will be wrapped in the right side of BinaryExpr + AllOp(Box), /// Unary operation e.g. `NOT foo` UnaryOp { op: UnaryOperator, @@ -433,6 +437,8 @@ impl fmt::Display for Expr { high ), Expr::BinaryOp { left, op, right } => write!(f, "{} {} {}", left, op, right), + Expr::AnyOp(expr) => write!(f, "ANY({})", expr), + Expr::AllOp(expr) => write!(f, "ALL({})", expr), Expr::UnaryOp { op, expr } => { if op == &UnaryOperator::PGPostfixFactorial { write!(f, "{}{}", expr, op) diff --git a/src/parser.rs b/src/parser.rs index 5df75d785..1caf2f2c6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1060,6 +1060,7 @@ impl<'a> Parser<'a> { /// Parse an operator following an expression pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { let tok = self.next_token(); + let regular_binary_operator = match &tok { Token::Spaceship => Some(BinaryOperator::Spaceship), Token::DoubleEq => Some(BinaryOperator::Eq), @@ -1112,11 +1113,29 @@ impl<'a> Parser<'a> { }; if let Some(op) = regular_binary_operator { - Ok(Expr::BinaryOp { - left: Box::new(expr), - op, - right: Box::new(self.parse_subexpr(precedence)?), - }) + if let Some(keyword) = self.parse_one_of_keywords(&[Keyword::ANY, Keyword::ALL]) { + self.expect_token(&Token::LParen)?; + let right = self.parse_subexpr(precedence)?; + self.expect_token(&Token::RParen)?; + + let right = match keyword { + Keyword::ALL => Box::new(Expr::AllOp(Box::new(right))), + Keyword::ANY => Box::new(Expr::AnyOp(Box::new(right))), + _ => unreachable!(), + }; + + Ok(Expr::BinaryOp { + left: Box::new(expr), + op, + right, + }) + } else { + Ok(Expr::BinaryOp { + left: Box::new(expr), + op, + right: Box::new(self.parse_subexpr(precedence)?), + }) + } } else if let Token::Word(w) = &tok { match w.keyword { Keyword::IS => { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 43db2f769..aa7b4d9b5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -998,6 +998,32 @@ fn parse_bitwise_ops() { } } +#[test] +fn parse_binary_any() { + let select = verified_only_select("SELECT a = ANY(b)"); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::AnyOp(Box::new(Expr::Identifier(Ident::new("b"))))), + }), + select.projection[0] + ); +} + +#[test] +fn parse_binary_all() { + let select = verified_only_select("SELECT a = ALL(b)"); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::AllOp(Box::new(Expr::Identifier(Ident::new("b"))))), + }), + select.projection[0] + ); +} + #[test] fn parse_logical_xor() { let sql = "SELECT true XOR true, false XOR false, true XOR false, false XOR true"; From 835bb2f9add9bc2394a5ff8279891509e188ada6 Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Mon, 9 May 2022 19:08:24 +0200 Subject: [PATCH 08/33] into keyword is optional (#473) Signed-off-by: Maciej Obuchowski --- src/ast/mod.rs | 17 +++++++++--- src/parser.rs | 11 +++++--- tests/sqlparser_common.rs | 55 +++++++++++++++++++++++++-------------- 3 files changed, 58 insertions(+), 25 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f2f7f1498..7ba32153a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -770,6 +770,8 @@ pub enum Statement { Insert { /// Only for Sqlite or: Option, + /// INTO - optional keyword + into: bool, /// TABLE table_name: ObjectName, /// COLUMNS @@ -1043,6 +1045,8 @@ pub enum Statement { Savepoint { name: Ident }, // MERGE INTO statement, based on Snowflake. See Merge { + // optional INTO keyword + into: bool, // Specifies the table to merge table: TableFactor, // Specifies the table or subquery to join with the target table @@ -1188,6 +1192,7 @@ impl fmt::Display for Statement { } Statement::Insert { or, + into, table_name, overwrite, partitioned, @@ -1202,9 +1207,10 @@ impl fmt::Display for Statement { } else { write!( f, - "INSERT {act}{tbl} {table_name} ", + "INSERT{over}{int}{tbl} {table_name} ", table_name = table_name, - act = if *overwrite { "OVERWRITE" } else { "INTO" }, + over = if *overwrite { " OVERWRITE" } else { "" }, + int = if *into { " INTO" } else { "" }, tbl = if *table { " TABLE" } else { "" } )?; } @@ -1755,13 +1761,18 @@ impl fmt::Display for Statement { write!(f, "{}", name) } Statement::Merge { + into, table, source, alias, on, clauses, } => { - write!(f, "MERGE INTO {} USING {} ", table, source)?; + write!( + f, + "MERGE{int} {table} USING {source} ", + int = if *into { " INTO" } else { "" } + )?; if let Some(a) = alias { write!(f, "as {} ", a)?; }; diff --git a/src/parser.rs b/src/parser.rs index 1caf2f2c6..e26c7a856 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3802,8 +3802,11 @@ impl<'a> Parser<'a> { } else { None }; - let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; - let overwrite = action == Keyword::OVERWRITE; + + let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); + let into = action == Some(Keyword::INTO); + let overwrite = action == Some(Keyword::OVERWRITE); + let local = self.parse_keyword(Keyword::LOCAL); if self.parse_keyword(Keyword::DIRECTORY) { @@ -3854,6 +3857,7 @@ impl<'a> Parser<'a> { Ok(Statement::Insert { or, table_name, + into, overwrite, partitioned, columns, @@ -4266,7 +4270,7 @@ impl<'a> Parser<'a> { } pub fn parse_merge(&mut self) -> Result { - self.expect_keyword(Keyword::INTO)?; + let into = self.parse_keyword(Keyword::INTO); let table = self.parse_table_factor()?; @@ -4278,6 +4282,7 @@ impl<'a> Parser<'a> { let clauses = self.parse_merge_clauses()?; Ok(Statement::Merge { + into, table, source: Box::new(source), alias, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index aa7b4d9b5..fcc30b31b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -41,6 +41,9 @@ fn parse_insert_values() { let rows1 = vec![row.clone()]; let rows2 = vec![row.clone(), row]; + let sql = "INSERT customer VALUES (1, 2, 3)"; + check_one(sql, "customer", &[], &rows1); + let sql = "INSERT INTO customer VALUES (1, 2, 3)"; check_one(sql, "customer", &[], &rows1); @@ -91,16 +94,6 @@ fn parse_insert_values() { verified_stmt("INSERT INTO customer WITH foo AS (SELECT 1) SELECT * FROM foo UNION VALUES (1)"); } -#[test] -fn parse_insert_invalid() { - let sql = "INSERT public.customer (id, name, active) VALUES (1, 2, 3)"; - let res = parse_sql_statements(sql); - assert_eq!( - ParserError::ParserError("Expected one of INTO or OVERWRITE, found: public".to_string()), - res.unwrap_err() - ); -} - #[test] fn parse_insert_sqlite() { let dialect = SQLiteDialect {}; @@ -4357,14 +4350,29 @@ fn test_revoke() { #[test] fn parse_merge() { let sql = "MERGE INTO s.bar AS dest USING (SELECT * FROM s.foo) as stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; - match verified_stmt(sql) { - Statement::Merge { - table, - source, - alias, - on, - clauses, - } => { + let sql_no_into = "MERGE s.bar AS dest USING (SELECT * FROM s.foo) as stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; + match (verified_stmt(sql), verified_stmt(sql_no_into)) { + ( + Statement::Merge { + into, + table, + source, + alias, + on, + clauses, + }, + Statement::Merge { + into: no_into, + table: table_no_into, + source: source_no_into, + alias: alias_no_into, + on: on_no_into, + clauses: clauses_no_into, + }, + ) => { + assert!(into); + assert!(!no_into); + assert_eq!( table, TableFactor::Table { @@ -4377,6 +4385,8 @@ fn parse_merge() { with_hints: vec![] } ); + assert_eq!(table, table_no_into); + assert_eq!( source, Box::new(SetExpr::Query(Box::new(Query { @@ -4411,6 +4421,8 @@ fn parse_merge() { lock: None }))) ); + assert_eq!(source, source_no_into); + assert_eq!( alias, Some(TableAlias { @@ -4418,6 +4430,8 @@ fn parse_merge() { columns: vec![] }) ); + assert_eq!(alias, alias_no_into); + assert_eq!( on, Box::new(Expr::BinaryOp { @@ -4446,6 +4460,8 @@ fn parse_merge() { }) }) ); + assert_eq!(on, on_no_into); + assert_eq!( clauses, vec![ @@ -4488,7 +4504,8 @@ fn parse_merge() { }, MergeClause::MatchedDelete(None) ] - ) + ); + assert_eq!(clauses, clauses_no_into); } _ => unreachable!(), } From 6d057ef4dfcd80e2311923d448890886bf5b75e1 Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Mon, 9 May 2022 20:21:20 +0200 Subject: [PATCH 09/33] set: allow dots in variables by moving to ObjectName (#484) Signed-off-by: Maciej Obuchowski --- src/ast/mod.rs | 2 +- src/parser.rs | 6 +++--- tests/sqlparser_postgres.rs | 42 ++++++++++++++++++++++++++++++++----- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7ba32153a..e2435b516 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -921,7 +921,7 @@ pub enum Statement { SetVariable { local: bool, hivevar: bool, - variable: Ident, + variable: ObjectName, value: Vec, }, /// SHOW diff --git a/src/parser.rs b/src/parser.rs index e26c7a856..c67ce9a5b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3266,7 +3266,7 @@ impl<'a> Parser<'a> { }); } - let variable = self.parse_identifier()?; + let variable = self.parse_object_name()?; if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { let mut values = vec![]; loop { @@ -3287,14 +3287,14 @@ impl<'a> Parser<'a> { value: values, }); } - } else if variable.value == "CHARACTERISTICS" { + } else if variable.to_string() == "CHARACTERISTICS" { self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, snapshot: None, session: true, }) - } else if variable.value == "TRANSACTION" && modifier.is_none() { + } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { if self.parse_keyword(Keyword::SNAPSHOT) { let snaphot_id = self.parse_value()?; return Ok(Statement::SetTransaction { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 0106e255d..8716f5aa3 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -18,6 +18,7 @@ mod test_utils; use test_utils::*; +use sqlparser::ast::Value::Boolean; use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, PostgreSqlDialect}; use sqlparser::parser::ParserError; @@ -780,7 +781,7 @@ fn parse_set() { Statement::SetVariable { local: false, hivevar: false, - variable: "a".into(), + variable: ObjectName(vec![Ident::new("a")]), value: vec![SetVariableValue::Ident("b".into())], } ); @@ -791,7 +792,7 @@ fn parse_set() { Statement::SetVariable { local: false, hivevar: false, - variable: "a".into(), + variable: ObjectName(vec![Ident::new("a")]), value: vec![SetVariableValue::Literal(Value::SingleQuotedString( "b".into() ))], @@ -804,7 +805,7 @@ fn parse_set() { Statement::SetVariable { local: false, hivevar: false, - variable: "a".into(), + variable: ObjectName(vec![Ident::new("a")]), value: vec![SetVariableValue::Literal(number("0"))], } ); @@ -815,7 +816,7 @@ fn parse_set() { Statement::SetVariable { local: false, hivevar: false, - variable: "a".into(), + variable: ObjectName(vec![Ident::new("a")]), value: vec![SetVariableValue::Ident("DEFAULT".into())], } ); @@ -826,11 +827,42 @@ fn parse_set() { Statement::SetVariable { local: true, hivevar: false, - variable: "a".into(), + variable: ObjectName(vec![Ident::new("a")]), value: vec![SetVariableValue::Ident("b".into())], } ); + let stmt = pg_and_generic().verified_stmt("SET a.b.c = b"); + assert_eq!( + stmt, + Statement::SetVariable { + local: false, + hivevar: false, + variable: ObjectName(vec![Ident::new("a"), Ident::new("b"), Ident::new("c")]), + value: vec![SetVariableValue::Ident("b".into())], + } + ); + + let stmt = pg_and_generic().one_statement_parses_to( + "SET hive.tez.auto.reducer.parallelism=false", + "SET hive.tez.auto.reducer.parallelism = false", + ); + assert_eq!( + stmt, + Statement::SetVariable { + local: false, + hivevar: false, + variable: ObjectName(vec![ + Ident::new("hive"), + Ident::new("tez"), + Ident::new("auto"), + Ident::new("reducer"), + Ident::new("parallelism") + ]), + value: vec![SetVariableValue::Literal(Boolean(false))], + } + ); + pg_and_generic().one_statement_parses_to("SET a TO b", "SET a = b"); pg_and_generic().one_statement_parses_to("SET SESSION a = b", "SET a = b"); From 6b2fc8102f39ae92273b9ef9ace1285f6145b546 Mon Sep 17 00:00:00 2001 From: Alex Qyoun-ae <4062971+MazterQyou@users.noreply.github.com> Date: Mon, 9 May 2022 22:29:43 +0400 Subject: [PATCH 10/33] feat: Support `TABLE` keyword with `SELECT INTO` (#487) --- src/ast/query.rs | 4 +++- src/parser.rs | 2 ++ tests/sqlparser_common.rs | 8 ++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 7526c23cc..472b9e6ba 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -654,6 +654,7 @@ impl fmt::Display for Values { pub struct SelectInto { pub temporary: bool, pub unlogged: bool, + pub table: bool, pub name: ObjectName, } @@ -661,7 +662,8 @@ impl fmt::Display for SelectInto { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let temporary = if self.temporary { " TEMPORARY" } else { "" }; let unlogged = if self.unlogged { " UNLOGGED" } else { "" }; + let table = if self.table { " TABLE" } else { "" }; - write!(f, "INTO{}{} {}", temporary, unlogged, self.name) + write!(f, "INTO{}{}{} {}", temporary, unlogged, table, self.name) } } diff --git a/src/parser.rs b/src/parser.rs index c67ce9a5b..e7f2e24b2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3138,10 +3138,12 @@ impl<'a> Parser<'a> { .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) .is_some(); let unlogged = self.parse_keyword(Keyword::UNLOGGED); + let table = self.parse_keyword(Keyword::TABLE); let name = self.parse_object_name()?; Some(SelectInto { temporary, unlogged, + table, name, }) } else { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index fcc30b31b..b70f592f6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -383,13 +383,17 @@ fn parse_select_into() { &SelectInto { temporary: false, unlogged: false, + table: false, name: ObjectName(vec![Ident::new("table0")]) }, only(&select.into) ); - let sql = "SELECT * INTO TEMPORARY UNLOGGED table0 FROM table1"; - one_statement_parses_to(sql, "SELECT * INTO TEMPORARY UNLOGGED table0 FROM table1"); + let sql = "SELECT * INTO TEMPORARY UNLOGGED TABLE table0 FROM table1"; + one_statement_parses_to( + sql, + "SELECT * INTO TEMPORARY UNLOGGED TABLE table0 FROM table1", + ); // Do not allow aliases here let sql = "SELECT * INTO table0 asdf FROM table1"; From ed86c6d53dc6985426fc3af56585e9db63c77e0b Mon Sep 17 00:00:00 2001 From: Poonai Date: Tue, 10 May 2022 00:42:22 +0530 Subject: [PATCH 11/33] add support for postgres composite types (#466) * add support for postgres composite types Signed-off-by: password * fix composite test for bigdecimal feature Signed-off-by: password --- src/ast/mod.rs | 8 +++++ src/parser.rs | 13 +++++++- tests/sqlparser_postgres.rs | 61 +++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e2435b516..4a3e346f6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -231,6 +231,11 @@ pub enum Expr { operator: JsonOperator, right: Box, }, + /// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n + CompositeAccess { + expr: Box, + key: Ident, + }, /// `IS NULL` operator IsNull(Box), /// `IS NOT NULL` operator @@ -565,6 +570,9 @@ impl fmt::Display for Expr { } => { write!(f, "{} {} {}", left, operator, right) } + Expr::CompositeAccess { expr, key } => { + write!(f, "{}.{}", expr, key) + } } } } diff --git a/src/parser.rs b/src/parser.rs index e7f2e24b2..c4b13e91d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -518,7 +518,18 @@ impl<'a> Parser<'a> { } }; self.expect_token(&Token::RParen)?; - Ok(expr) + if !self.consume_token(&Token::Period) { + return Ok(expr); + } + let tok = self.next_token(); + let key = match tok { + Token::Word(word) => word.to_ident(), + _ => return parser_err!(format!("Expected identifier, found: {}", tok)), + }; + Ok(Expr::CompositeAccess { + expr: Box::new(expr), + key, + }) } Token::Placeholder(_) => { self.prev_token(); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 8716f5aa3..69b7fcfa4 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1334,6 +1334,67 @@ fn test_json() { ); } +#[test] +fn test_composite_value() { + let sql = "SELECT (on_hand.item).name FROM on_hand WHERE (on_hand.item).price > 9"; + let select = pg().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::CompositeAccess { + key: Ident::new("name"), + expr: Box::new(Expr::Nested(Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("on_hand"), + Ident::new("item") + ])))) + }), + select.projection[0] + ); + + #[cfg(feature = "bigdecimal")] + let num: Expr = Expr::Value(Value::Number(bigdecimal::BigDecimal::from(9), false)); + #[cfg(not(feature = "bigdecimal"))] + let num: Expr = Expr::Value(Value::Number("9".to_string(), false)); + assert_eq!( + select.selection, + Some(Expr::BinaryOp { + left: Box::new(Expr::CompositeAccess { + key: Ident::new("price"), + expr: Box::new(Expr::Nested(Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("on_hand"), + Ident::new("item") + ])))) + }), + op: BinaryOperator::Gt, + right: Box::new(num) + }) + ); + + let sql = "SELECT (information_schema._pg_expandarray(ARRAY['i', 'i'])).n"; + let select = pg().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::CompositeAccess { + key: Ident::new("n"), + expr: Box::new(Expr::Nested(Box::new(Expr::Function(Function { + name: ObjectName(vec![ + Ident::new("information_schema"), + Ident::new("_pg_expandarray") + ]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Array( + Array { + elem: vec![ + Expr::Value(Value::SingleQuotedString("i".to_string())), + Expr::Value(Value::SingleQuotedString("i".to_string())), + ], + named: true + } + )))], + over: None, + distinct: false, + })))) + }), + select.projection[0] + ); +} + #[test] fn parse_comments() { match pg().verified_stmt("COMMENT ON COLUMN tab.name IS 'comment'") { From 35f5f0be4df7e98aa42a0f2895c87e15225c5988 Mon Sep 17 00:00:00 2001 From: yuval-illumex <85674443+yuval-illumex@users.noreply.github.com> Date: Tue, 10 May 2022 13:34:01 +0300 Subject: [PATCH 12/33] Redshift - Add support in sharp as start of the field name (#485) * Add support in sharp * CR Review --- src/dialect/redshift.rs | 6 ++++-- tests/sqlparser_redshift.rs | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index e2faed396..c85f3dc20 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -44,10 +44,12 @@ impl Dialect for RedshiftSqlDialect { } fn is_identifier_start(&self, ch: char) -> bool { - PostgreSqlDialect {}.is_identifier_start(ch) + // Extends Postgres dialect with sharp + PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' } fn is_identifier_part(&self, ch: char) -> bool { - PostgreSqlDialect {}.is_identifier_part(ch) + // Extends Postgres dialect with sharp + PostgreSqlDialect {}.is_identifier_part(ch) || ch == '#' } } diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 0c216a497..ce818a98d 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -100,3 +100,13 @@ fn redshift() -> TestedDialects { dialects: vec![Box::new(RedshiftSqlDialect {})], } } + +#[test] +fn test_sharp() { + let sql = "SELECT #_of_values"; + let select = redshift().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("#_of_values"))), + select.projection[0] + ); +} From 9750841a66d37e3c59e68dd10b1e86c089e02f53 Mon Sep 17 00:00:00 2001 From: Maciej Skrzypkowski Date: Tue, 10 May 2022 12:34:45 +0200 Subject: [PATCH 13/33] Parse merge source as table factor (#483) * Parse source of MERGE as table_factor Some MERGE queries need a table as a soruce, added proper test showing it * Clippy fix Co-authored-by: Maciej Skrzypkowski --- src/ast/mod.rs | 8 +--- src/parser.rs | 6 +-- tests/sqlparser_common.rs | 99 ++++++++++++++++++++++----------------- 3 files changed, 58 insertions(+), 55 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4a3e346f6..d9052916d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1058,9 +1058,7 @@ pub enum Statement { // Specifies the table to merge table: TableFactor, // Specifies the table or subquery to join with the target table - source: Box, - // Specifies alias to the table that is joined with target table - alias: Option, + source: TableFactor, // Specifies the expression on which to join the target table and source on: Box, // Specifies the actions to perform when values match or do not match. @@ -1772,7 +1770,6 @@ impl fmt::Display for Statement { into, table, source, - alias, on, clauses, } => { @@ -1781,9 +1778,6 @@ impl fmt::Display for Statement { "MERGE{int} {table} USING {source} ", int = if *into { " INTO" } else { "" } )?; - if let Some(a) = alias { - write!(f, "as {} ", a)?; - }; write!(f, "ON {} ", on)?; write!(f, "{}", display_separated(clauses, " ")) } diff --git a/src/parser.rs b/src/parser.rs index c4b13e91d..5ee3d5cb6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4288,8 +4288,7 @@ impl<'a> Parser<'a> { let table = self.parse_table_factor()?; self.expect_keyword(Keyword::USING)?; - let source = self.parse_query_body(0)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let source = self.parse_table_factor()?; self.expect_keyword(Keyword::ON)?; let on = self.parse_expr()?; let clauses = self.parse_merge_clauses()?; @@ -4297,8 +4296,7 @@ impl<'a> Parser<'a> { Ok(Statement::Merge { into, table, - source: Box::new(source), - alias, + source, on: Box::new(on), clauses, }) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b70f592f6..bed6bbcde 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4353,15 +4353,14 @@ fn test_revoke() { #[test] fn parse_merge() { - let sql = "MERGE INTO s.bar AS dest USING (SELECT * FROM s.foo) as stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; - let sql_no_into = "MERGE s.bar AS dest USING (SELECT * FROM s.foo) as stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; + let sql = "MERGE INTO s.bar AS dest USING (SELECT * FROM s.foo) AS stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; + let sql_no_into = "MERGE s.bar AS dest USING (SELECT * FROM s.foo) AS stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; match (verified_stmt(sql), verified_stmt(sql_no_into)) { ( Statement::Merge { into, table, source, - alias, on, clauses, }, @@ -4369,7 +4368,6 @@ fn parse_merge() { into: no_into, table: table_no_into, source: source_no_into, - alias: alias_no_into, on: on_no_into, clauses: clauses_no_into, }, @@ -4393,49 +4391,50 @@ fn parse_merge() { assert_eq!( source, - Box::new(SetExpr::Query(Box::new(Query { - with: None, - body: SetExpr::Select(Box::new(Select { - distinct: false, - top: None, - projection: vec![SelectItem::Wildcard], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("s"), Ident::new("foo")]), - alias: None, - args: vec![], - with_hints: vec![], - }, - joins: vec![] - }], - lateral_views: vec![], - selection: None, - group_by: vec![], - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - qualify: None - })), - order_by: vec![], - limit: None, - offset: None, - fetch: None, - lock: None - }))) + TableFactor::Derived { + lateral: false, + subquery: Box::new(Query { + with: None, + body: SetExpr::Select(Box::new(Select { + distinct: false, + top: None, + projection: vec![SelectItem::Wildcard], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new("s"), Ident::new("foo")]), + alias: None, + args: vec![], + with_hints: vec![] + }, + joins: vec![] + }], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + qualify: None, + })), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + lock: None + }), + alias: Some(TableAlias { + name: Ident { + value: "stg".to_string(), + quote_style: None + }, + columns: vec![] + }) + } ); assert_eq!(source, source_no_into); - assert_eq!( - alias, - Some(TableAlias { - name: Ident::new("stg"), - columns: vec![] - }) - ); - assert_eq!(alias, alias_no_into); - assert_eq!( on, Box::new(Expr::BinaryOp { @@ -4515,6 +4514,18 @@ fn parse_merge() { } } +#[test] +fn test_merge_into_using_table() { + let sql = "MERGE INTO target_table USING source_table \ + ON target_table.id = source_table.oooid \ + WHEN MATCHED THEN \ + UPDATE SET target_table.description = source_table.description \ + WHEN NOT MATCHED THEN \ + INSERT (ID, description) VALUES (source_table.id, source_table.description)"; + + verified_stmt(sql); +} + #[test] fn test_lock() { let sql = "SELECT * FROM student WHERE id = '1' FOR UPDATE"; From acd60d169d2bc0c0778dc619a94f4c0a2943c30f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 10 May 2022 09:23:04 -0400 Subject: [PATCH 14/33] Update changelog for 0.17.0 (#488) * Update changelog for 0.17.0 * add reference to #466 * Updates for recent merges --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aee2df636..bd258e6b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,38 @@ Given that the parser produces a typed AST, any changes to the AST will technica Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.17.0] 2022-05-09 + +### Added + + + +* Support `#` as first character in field name for `RedShift` dialect (#485) - Thanks @yuval-illumex +* Support for postgres composite types (#466) - Thanks @poonai +* Support `TABLE` keyword with SELECT INTO (#487) - Thanks @MazterQyou +* Support `ANY`/`ALL` operators (#477) - Thanks @ovr +* Support `ArrayIndex` in `GenericDialect` (#480) - Thanks @ovr +* Support `Redshift` dialect, handle square brackets properly (#471) - Thanks @mskrzypkows +* Support `KILL` statement (#479) - Thanks @ovr +* Support `QUALIFY` clause on `SELECT` for `Snowflake` dialect (#465) - Thanks @mobuchowski +* Support `POSITION(x IN y)` function syntax (#463) @yuval-illumex +* Support `global`,`local`, `on commit` for `create temporary table` (#456) - Thanks @gandronchik +* Support `NVARCHAR` data type (#462) - Thanks @yuval-illumex +* Support for postgres json operators `->`, `->>`, `#>`, and `#>>` (#458) - Thanks @poonai +* Support `SET ROLE` statement (#455) - Thanks @slhmy + +### Changed: +* Improve docstrings for `KILL` statement (#481) - Thanks @alamb +* Add negative tests for `POSITION` (#469) - Thanks @alamb +* Add negative tests for `IN` parsing (#468) - Thanks @alamb +* Suppport table names (as well as subqueries) as source in `MERGE` statements (#483) - Thanks @mskrzypkows + + +### Fixed: +* `INTO` keyword is optional for `INSERT`, `MERGE` (#473) - Thanks @mobuchowski +* Support `IS TRUE` and `IS FALSE` expressions in boolean filter (#474) - Thanks @yuval-illumex +* Support fully qualified object names in `SET VARIABLE` (#484) - Thanks mobuchowski + ## [0.16.0] 2022-04-03 ### Added @@ -65,8 +97,6 @@ Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented * export all methods of parser (#397) - Thanks @neverchanje! * Clarify maintenance status on README (#416) - @alamb -@panarch - ### Fixed * Fix new clippy errors (#412) - @alamb * Fix panic with `GRANT/REVOKE` in `CONNECT`, `CREATE`, `EXECUTE` or `TEMPORARY` - Thanks @evgenyx00 From 484a7b6da4b1cc2a8a994f63d014677ba7bfad8e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 10 May 2022 09:26:00 -0400 Subject: [PATCH 15/33] (cargo-release) version 0.17.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 94fd9f394..00b4d1a1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.16.0" +version = "0.17.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 97a148aee43f99a74dd732cb1910e38eb1b29535 Mon Sep 17 00:00:00 2001 From: Yoshiyuki Komazaki Date: Tue, 10 May 2022 23:25:27 +0900 Subject: [PATCH 16/33] Add BigQuery dialect (#490) --- examples/cli.rs | 1 + src/dialect/bigquery.rs | 35 +++++++++++++++ src/dialect/mod.rs | 2 + tests/sqlparser_bigquery.rs | 86 +++++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 4 +- 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 src/dialect/bigquery.rs create mode 100644 tests/sqlparser_bigquery.rs diff --git a/examples/cli.rs b/examples/cli.rs index 1c177faaf..fcaf1b144 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -38,6 +38,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] let dialect: Box = match std::env::args().nth(2).unwrap_or_default().as_ref() { "--ansi" => Box::new(AnsiDialect {}), + "--bigquery" => Box::new(BigQueryDialect {}), "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--mysql" => Box::new(MySqlDialect {}), diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs new file mode 100644 index 000000000..e42676b22 --- /dev/null +++ b/src/dialect/bigquery.rs @@ -0,0 +1,35 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; + +#[derive(Debug, Default)] +pub struct BigQueryDialect; + +impl Dialect for BigQueryDialect { + // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '`' + } + + fn is_identifier_start(&self, ch: char) -> bool { + ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_' + } + + fn is_identifier_part(&self, ch: char) -> bool { + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) + || ch == '_' + || ch == '-' + } +} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8defd66f3..63821dd74 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -11,6 +11,7 @@ // limitations under the License. mod ansi; +mod bigquery; mod clickhouse; mod generic; mod hive; @@ -27,6 +28,7 @@ use core::iter::Peekable; use core::str::Chars; pub use self::ansi::AnsiDialect; +pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs new file mode 100644 index 000000000..380530199 --- /dev/null +++ b/tests/sqlparser_bigquery.rs @@ -0,0 +1,86 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[macro_use] +mod test_utils; + +use test_utils::*; + +use sqlparser::ast::*; +use sqlparser::dialect::BigQueryDialect; + +#[test] +fn parse_table_identifiers() { + fn test_table_ident(ident: &str, expected: Vec) { + let sql = format!("SELECT 1 FROM {}", ident); + let select = bigquery().verified_only_select(&sql); + assert_eq!( + select.from, + vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(expected), + alias: None, + args: vec![], + with_hints: vec![], + }, + joins: vec![] + },] + ); + } + fn test_table_ident_err(ident: &str) { + let sql = format!("SELECT 1 FROM {}", ident); + assert!(bigquery().parse_sql_statements(&sql).is_err()); + } + + test_table_ident("da-sh-es", vec![Ident::new("da-sh-es")]); + + test_table_ident("`spa ce`", vec![Ident::with_quote('`', "spa ce")]); + + test_table_ident( + "`!@#$%^&*()-=_+`", + vec![Ident::with_quote('`', "!@#$%^&*()-=_+")], + ); + + test_table_ident( + "_5abc.dataField", + vec![Ident::new("_5abc"), Ident::new("dataField")], + ); + test_table_ident( + "`5abc`.dataField", + vec![Ident::with_quote('`', "5abc"), Ident::new("dataField")], + ); + + test_table_ident_err("5abc.dataField"); + + test_table_ident( + "abc5.dataField", + vec![Ident::new("abc5"), Ident::new("dataField")], + ); + + test_table_ident_err("abc5!.dataField"); + + test_table_ident( + "`GROUP`.dataField", + vec![Ident::with_quote('`', "GROUP"), Ident::new("dataField")], + ); + + // TODO: this should be error + // test_table_ident_err("GROUP.dataField"); + + test_table_ident("abc5.GROUP", vec![Ident::new("abc5"), Ident::new("GROUP")]); +} + +fn bigquery() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(BigQueryDialect {})], + } +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index bed6bbcde..0986e407e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -23,7 +23,8 @@ mod test_utils; use matches::assert_matches; use sqlparser::ast::*; use sqlparser::dialect::{ - AnsiDialect, GenericDialect, MsSqlDialect, PostgreSqlDialect, SQLiteDialect, SnowflakeDialect, + AnsiDialect, BigQueryDialect, GenericDialect, MsSqlDialect, PostgreSqlDialect, SQLiteDialect, + SnowflakeDialect, }; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError}; @@ -4556,6 +4557,7 @@ fn test_placeholder() { Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), + Box::new(BigQueryDialect {}), Box::new(SnowflakeDialect {}), // Note: `$` is the starting word for the HiveDialect identifier // Box::new(sqlparser::dialect::HiveDialect {}), From dd805e9a6b6254cb9b76edf35e71b9c71f0895d5 Mon Sep 17 00:00:00 2001 From: Alex Yaroslavsky <70210301+alexsatori@users.noreply.github.com> Date: Sun, 15 May 2022 21:58:56 +0300 Subject: [PATCH 17/33] Support unicode whitespace (#482) * Support unicode whitespace * Add test --- src/tokenizer.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 9de656e34..3fb0f66b2 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -653,6 +653,10 @@ impl<'a> Tokenizer<'a> { ); Ok(Some(Token::Placeholder(String::from("$") + &s))) } + //whitespace check (including unicode chars) should be last as it covers some of the chars above + ch if ch.is_whitespace() => { + self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)) + } other => self.consume_and_return(chars, Token::Char(other)), }, None => Ok(None), @@ -1254,6 +1258,21 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_unicode_whitespace() { + let sql = String::from(" \u{2003}\n"); + + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![ + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Newline), + ]; + compare(expected, tokens); + } + #[test] fn tokenize_mismatched_quotes() { let sql = String::from("\"foo"); From 11046f66e7f50fd2151b03c1890d85de4514569f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 22 May 2022 15:03:48 -0400 Subject: [PATCH 18/33] Correct typo: indexs to indexes (#492) --- src/ast/mod.rs | 6 +++--- src/parser.rs | 6 +++--- tests/sqlparser_postgres.rs | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d9052916d..92bde05aa 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -371,7 +371,7 @@ pub enum Expr { /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` ArrayIndex { obj: Box, - indexs: Vec, + indexes: Vec, }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), @@ -553,9 +553,9 @@ impl fmt::Display for Expr { Expr::Tuple(exprs) => { write!(f, "({})", display_comma_separated(exprs)) } - Expr::ArrayIndex { obj, indexs } => { + Expr::ArrayIndex { obj, indexes } => { write!(f, "{}", obj)?; - for i in indexs { + for i in indexes { write!(f, "[{}]", i)?; } Ok(()) diff --git a/src/parser.rs b/src/parser.rs index 5ee3d5cb6..beb7394ba 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1234,15 +1234,15 @@ impl<'a> Parser<'a> { pub fn parse_array_index(&mut self, expr: Expr) -> Result { let index = self.parse_expr()?; self.expect_token(&Token::RBracket)?; - let mut indexs: Vec = vec![index]; + let mut indexes: Vec = vec![index]; while self.consume_token(&Token::LBracket) { let index = self.parse_expr()?; self.expect_token(&Token::RBracket)?; - indexs.push(index); + indexes.push(index); } Ok(Expr::ArrayIndex { obj: Box::new(expr), - indexs, + indexes, }) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 69b7fcfa4..4fee8083a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1173,7 +1173,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::ArrayIndex { obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexs: vec![num[0].clone()], + indexes: vec![num[0].clone()], }, expr_from_projection(only(&select.projection)), ); @@ -1183,7 +1183,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::ArrayIndex { obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexs: vec![num[0].clone(), num[0].clone()], + indexes: vec![num[0].clone(), num[0].clone()], }, expr_from_projection(only(&select.projection)), ); @@ -1193,7 +1193,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::ArrayIndex { obj: Box::new(Expr::Identifier(Ident::new("bar"))), - indexs: vec![ + indexes: vec![ num[0].clone(), Expr::Identifier(Ident { value: "baz".to_string(), @@ -1224,7 +1224,7 @@ fn parse_array_index_expr() { None ))))) }))), - indexs: vec![num[1].clone(), num[2].clone()], + indexes: vec![num[1].clone(), num[2].clone()], }, expr_from_projection(only(&select.projection)), ); From 85e0e5fd397d20bfaa4e64ed49be98ac67f0ab9b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 22 May 2022 15:26:06 -0400 Subject: [PATCH 19/33] Add docs for `MapAccess` (#489) * Add docs for `MapAccess` * fix: fmt * Apply suggestions from code review Co-authored-by: Dmitry Patsura * touchup Co-authored-by: Dmitry Patsura --- src/ast/mod.rs | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 92bde05aa..184add2ce 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -232,10 +232,7 @@ pub enum Expr { right: Box, }, /// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n - CompositeAccess { - expr: Box, - key: Ident, - }, + CompositeAccess { expr: Box, key: Ident }, /// `IS NULL` operator IsNull(Box), /// `IS NOT NULL` operator @@ -280,10 +277,7 @@ pub enum Expr { /// ALL operation e.g. `1 ALL (1)` or `foo > ALL(bar)`, It will be wrapped in the right side of BinaryExpr AllOp(Box), /// Unary operation e.g. `NOT foo` - UnaryOp { - op: UnaryOperator, - expr: Box, - }, + UnaryOp { op: UnaryOperator, expr: Box }, /// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` Cast { expr: Box, @@ -301,10 +295,7 @@ pub enum Expr { expr: Box, }, /// POSITION( in ) - Position { - expr: Box, - r#in: Box, - }, + Position { expr: Box, r#in: Box }, /// SUBSTRING( [FROM ] [FOR ]) Substring { expr: Box, @@ -331,14 +322,12 @@ pub enum Expr { /// A constant of form ` 'value'`. /// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`), /// as well as constants of other types (a non-standard PostgreSQL extension). - TypedString { - data_type: DataType, - value: String, - }, - MapAccess { - column: Box, - keys: Vec, - }, + TypedString { data_type: DataType, value: String }, + /// Access a map-like object by field (e.g. `column['field']` or `column[4]` + /// Note that depending on the dialect, struct like accesses may be + /// parsed as [`ArrayIndex`] or [`MapAccess`] + /// + MapAccess { column: Box, keys: Vec }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// `CASE [] WHEN THEN ... [ELSE ] END` @@ -369,10 +358,7 @@ pub enum Expr { /// ROW / TUPLE a single value, such as `SELECT (1, 2)` Tuple(Vec), /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` - ArrayIndex { - obj: Box, - indexes: Vec, - }, + ArrayIndex { obj: Box, indexes: Vec }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), } From 74f92079ac6a16f85e5190c9c86be5c08c371d55 Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Sun, 22 May 2022 21:33:07 +0200 Subject: [PATCH 20/33] set: allow negative ident values (#495) Signed-off-by: Maciej Obuchowski --- src/parser.rs | 10 ++++++++++ tests/sqlparser_hive.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index beb7394ba..d9824946e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3287,6 +3287,16 @@ impl<'a> Parser<'a> { let value = match (self.parse_value(), token) { (Ok(value), _) => SetVariableValue::Literal(value), (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), + (Err(_), Token::Minus) => { + let next_token = self.next_token(); + match next_token { + Token::Word(ident) => SetVariableValue::Ident(Ident { + quote_style: ident.quote_style, + value: format!("-{}", ident.value), + }), + _ => self.expected("word", next_token)?, + } + } (Err(_), unexpected) => self.expected("variable value", unexpected)?, }; values.push(value); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index d933f0f25..71b391a34 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -15,7 +15,9 @@ //! Test SQL syntax specific to Hive. The parser based on the generic dialect //! is also tested (on the inputs it can handle). +use sqlparser::ast::{Ident, ObjectName, SetVariableValue, Statement}; use sqlparser::dialect::HiveDialect; +use sqlparser::parser::ParserError; use sqlparser::test_utils::*; #[test] @@ -205,6 +207,31 @@ fn from_cte() { println!("{}", hive().verified_stmt(rename)); } +#[test] +fn set_statement_with_minus() { + assert_eq!( + hive().verified_stmt("SET hive.tez.java.opts = -Xmx4g"), + Statement::SetVariable { + local: false, + hivevar: false, + variable: ObjectName(vec![ + Ident::new("hive"), + Ident::new("tez"), + Ident::new("java"), + Ident::new("opts") + ]), + value: vec![SetVariableValue::Ident("-Xmx4g".into())], + } + ); + + assert_eq!( + hive().parse_sql_statements("SET hive.tez.java.opts = -"), + Err(ParserError::ParserError( + "Expected word, found: EOF".to_string() + )) + ) +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From 7ab30d95b0498921cf41c6bec84c98612de22766 Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Sun, 22 May 2022 21:38:24 +0200 Subject: [PATCH 21/33] hive: add support for array<> (#491) Signed-off-by: Maciej Obuchowski Co-authored-by: Andrew Lamb --- src/parser.rs | 12 ++++++++- tests/sqlparser_common.rs | 51 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index d9824946e..412929cd6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2700,6 +2700,15 @@ impl<'a> Parser<'a> { } Keyword::ENUM => Ok(DataType::Enum(self.parse_string_values()?)), Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), + Keyword::ARRAY => { + // Hive array syntax. Note that nesting arrays - or other Hive syntax + // that ends with > will fail due to "C++" problem - >> is parsed as + // Token::ShiftRight + self.expect_token(&Token::Lt)?; + let inside_type = self.parse_data_type()?; + self.expect_token(&Token::Gt)?; + Ok(DataType::Array(Box::new(inside_type))) + } _ => { self.prev_token(); let type_name = self.parse_object_name()?; @@ -2709,7 +2718,8 @@ impl<'a> Parser<'a> { unexpected => self.expected("a data type name", unexpected), }?; - // Parse array data types. Note: this is postgresql-specific + // Parse array data types. Note: this is postgresql-specific and different from + // Keyword::ARRAY syntax from above while self.consume_token(&Token::LBracket) { self.expect_token(&Token::RBracket)?; data = DataType::Array(Box::new(data)) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0986e407e..336084093 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -23,8 +23,8 @@ mod test_utils; use matches::assert_matches; use sqlparser::ast::*; use sqlparser::dialect::{ - AnsiDialect, BigQueryDialect, GenericDialect, MsSqlDialect, PostgreSqlDialect, SQLiteDialect, - SnowflakeDialect, + AnsiDialect, BigQueryDialect, GenericDialect, HiveDialect, MsSqlDialect, PostgreSqlDialect, + SQLiteDialect, SnowflakeDialect, }; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError}; @@ -1755,6 +1755,53 @@ fn parse_create_table() { .contains("Expected constraint details after CONSTRAINT ")); } +#[test] +fn parse_create_table_hive_array() { + // Parsing [] type arrays does not work in MsSql since [ is used in is_delimited_identifier_start + let dialects = TestedDialects { + dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(HiveDialect {})], + }; + let sql = "CREATE TABLE IF NOT EXISTS something (key int, val array)"; + match dialects.one_statement_parses_to( + sql, + "CREATE TABLE IF NOT EXISTS something (key INT, val INT[])", + ) { + Statement::CreateTable { + if_not_exists, + name, + columns, + .. + } => { + assert!(if_not_exists); + assert_eq!(name, ObjectName(vec!["something".into()])); + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("key"), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("val"), + data_type: DataType::Array(Box::new(DataType::Int(None))), + collation: None, + options: vec![], + }, + ], + ) + } + _ => unreachable!(), + } + + let res = parse_sql_statements("CREATE TABLE IF NOT EXISTS something (key int, val array, found: )")); +} + #[test] fn parse_create_table_with_multiple_on_delete_in_constraint_fails() { parse_sql_statements( From a6d7a35dacbcfe282d1cae83916505c6294edf4e Mon Sep 17 00:00:00 2001 From: George Andronchik Date: Mon, 23 May 2022 03:44:53 +0800 Subject: [PATCH 22/33] feat: support DISCARD [ALL | PLANS | SEQUENCES | TEMPORARY | TEMP] (#500) --- src/ast/mod.rs | 29 +++++++++++++++++++++++++++++ src/keywords.rs | 2 ++ src/parser.rs | 19 +++++++++++++++++++ tests/sqlparser_common.rs | 27 +++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 184add2ce..aaebeee38 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -897,6 +897,11 @@ pub enum Statement { /// deleted along with the dropped table purge: bool, }, + /// DISCARD [ ALL | PLANS | SEQUENCES | TEMPORARY | TEMP ] + /// + /// Note: this is a PostgreSQL-specific statement, + /// but may also compatible with other SQL. + Discard { object_type: DiscardObject }, /// SET [ SESSION | LOCAL ] ROLE role_name /// /// Note: this is a PostgreSQL-specific statement, @@ -1561,6 +1566,10 @@ impl fmt::Display for Statement { if *cascade { " CASCADE" } else { "" }, if *purge { " PURGE" } else { "" } ), + Statement::Discard { object_type } => { + write!(f, "DISCARD {object_type}", object_type = object_type)?; + Ok(()) + } Statement::SetRole { local, session, @@ -2533,6 +2542,26 @@ impl fmt::Display for MergeClause { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum DiscardObject { + ALL, + PLANS, + SEQUENCES, + TEMP, +} + +impl fmt::Display for DiscardObject { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DiscardObject::ALL => f.write_str("ALL"), + DiscardObject::PLANS => f.write_str("PLANS"), + DiscardObject::SEQUENCES => f.write_str("SEQUENCES"), + DiscardObject::TEMP => f.write_str("TEMP"), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/keywords.rs b/src/keywords.rs index 709b60bf8..b4ddb68dd 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -184,6 +184,7 @@ define_keywords!( DESCRIBE, DETERMINISTIC, DIRECTORY, + DISCARD, DISCONNECT, DISTINCT, DISTRIBUTE, @@ -371,6 +372,7 @@ define_keywords!( PERCENTILE_DISC, PERCENT_RANK, PERIOD, + PLANS, PORTION, POSITION, POSITION_REGEX, diff --git a/src/parser.rs b/src/parser.rs index 412929cd6..6a3c9c442 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -166,6 +166,7 @@ impl<'a> Parser<'a> { Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::DROP => Ok(self.parse_drop()?), + Keyword::DISCARD => Ok(self.parse_discard()?), Keyword::DELETE => Ok(self.parse_delete()?), Keyword::INSERT => Ok(self.parse_insert()?), Keyword::UPDATE => Ok(self.parse_update()?), @@ -1786,6 +1787,24 @@ impl<'a> Parser<'a> { }) } + pub fn parse_discard(&mut self) -> Result { + let object_type = if self.parse_keyword(Keyword::ALL) { + DiscardObject::ALL + } else if self.parse_keyword(Keyword::PLANS) { + DiscardObject::PLANS + } else if self.parse_keyword(Keyword::SEQUENCES) { + DiscardObject::SEQUENCES + } else if self.parse_keyword(Keyword::TEMP) || self.parse_keyword(Keyword::TEMPORARY) { + DiscardObject::TEMP + } else { + return self.expected( + "ALL, PLANS, SEQUENCES, TEMP or TEMPORARY after DISCARD", + self.peek_token(), + ); + }; + Ok(Statement::Discard { object_type }) + } + pub fn parse_create_index(&mut self, unique: bool) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let index_name = self.parse_object_name()?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 336084093..46d824175 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4787,3 +4787,30 @@ fn parse_is_boolean() { res.unwrap_err() ); } + +#[test] +fn parse_discard() { + let sql = "DISCARD ALL"; + match verified_stmt(sql) { + Statement::Discard { object_type, .. } => assert_eq!(object_type, DiscardObject::ALL), + _ => unreachable!(), + } + + let sql = "DISCARD PLANS"; + match verified_stmt(sql) { + Statement::Discard { object_type, .. } => assert_eq!(object_type, DiscardObject::PLANS), + _ => unreachable!(), + } + + let sql = "DISCARD SEQUENCES"; + match verified_stmt(sql) { + Statement::Discard { object_type, .. } => assert_eq!(object_type, DiscardObject::SEQUENCES), + _ => unreachable!(), + } + + let sql = "DISCARD TEMP"; + match verified_stmt(sql) { + Statement::Discard { object_type, .. } => assert_eq!(object_type, DiscardObject::TEMP), + _ => unreachable!(), + } +} From 4070f3ec6e616be6e9249f7202338e0f05969237 Mon Sep 17 00:00:00 2001 From: Dmitry Patsura Date: Tue, 24 May 2022 16:26:50 +0300 Subject: [PATCH 23/33] feat: Convert IS TRUE|FALSE to expression (#499) --- src/ast/mod.rs | 6 ++++++ src/parser.rs | 18 +++++------------- tests/sqlparser_common.rs | 23 ++++++++++++++++------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index aaebeee38..cc5fa6044 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -233,6 +233,10 @@ pub enum Expr { }, /// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n CompositeAccess { expr: Box, key: Ident }, + /// `IS FALSE` operator + IsFalse(Box), + /// `IS TRUE` operator + IsTrue(Box), /// `IS NULL` operator IsNull(Box), /// `IS NOT NULL` operator @@ -379,6 +383,8 @@ impl fmt::Display for Expr { Ok(()) } Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), + Expr::IsTrue(ast) => write!(f, "{} IS TRUE", ast), + Expr::IsFalse(ast) => write!(f, "{} IS FALSE", ast), Expr::IsNull(ast) => write!(f, "{} IS NULL", ast), Expr::IsNotNull(ast) => write!(f, "{} IS NOT NULL", ast), Expr::InList { diff --git a/src/parser.rs b/src/parser.rs index 6a3c9c442..effbb93ab 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1155,6 +1155,10 @@ impl<'a> Parser<'a> { Ok(Expr::IsNull(Box::new(expr))) } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { Ok(Expr::IsNotNull(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::TRUE]) { + Ok(Expr::IsTrue(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::FALSE]) { + Ok(Expr::IsFalse(Box::new(expr))) } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::FROM]) { let expr2 = self.parse_expr()?; Ok(Expr::IsDistinctFrom(Box::new(expr), Box::new(expr2))) @@ -1162,21 +1166,9 @@ impl<'a> Parser<'a> { { let expr2 = self.parse_expr()?; Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2))) - } else if let Some(right) = - self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) - { - let mut val = Value::Boolean(true); - if right == Keyword::FALSE { - val = Value::Boolean(false); - } - Ok(Expr::BinaryOp { - left: Box::new(expr), - op: BinaryOperator::Eq, - right: Box::new(Expr::Value(val)), - }) } else { self.expected( - "[NOT] NULL or [NOT] DISTINCT FROM TRUE FALSE after IS", + "[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS", self.peek_token(), ) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 46d824175..f09f8e93a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4768,21 +4768,30 @@ fn parse_position_negative() { #[test] fn parse_is_boolean() { - one_statement_parses_to( - "SELECT f from foo where field is true", - "SELECT f FROM foo WHERE field = true", + use self::Expr::*; + + let sql = "a IS FALSE"; + assert_eq!( + IsFalse(Box::new(Identifier(Ident::new("a")))), + verified_expr(sql) ); - one_statement_parses_to( - "SELECT f from foo where field is false", - "SELECT f FROM foo WHERE field = false", + let sql = "a IS TRUE"; + assert_eq!( + IsTrue(Box::new(Identifier(Ident::new("a")))), + verified_expr(sql) ); + verified_stmt("SELECT f FROM foo WHERE field IS TRUE"); + + verified_stmt("SELECT f FROM foo WHERE field IS FALSE"); + let sql = "SELECT f from foo where field is 0"; let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError( - "Expected [NOT] NULL or [NOT] DISTINCT FROM TRUE FALSE after IS, found: 0".to_string() + "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0" + .to_string() ), res.unwrap_err() ); From 2c0886d9fe8cc0e6fb91ba63173d021384f24473 Mon Sep 17 00:00:00 2001 From: Dmitry Patsura Date: Wed, 25 May 2022 21:42:14 +0300 Subject: [PATCH 24/33] feat: Support escaped string literals (PostgreSQL) (#502) * feat: Support escaped string literals (PostgreSQL) Signed-off-by: Dmitry Patsura * lint * escape ', \r, \t * Update src/ast/value.rs Co-authored-by: Andrew Lamb * Update src/tokenizer.rs Co-authored-by: Andrew Lamb * test: two slashes * remove dead code * test: parsing error * support generic dialect too (for DF) Co-authored-by: Andrew Lamb --- src/ast/value.rs | 38 ++++++++++++++++++ src/parser.rs | 10 +++++ src/tokenizer.rs | 78 +++++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 41 +++++++++++++++++++ 4 files changed, 167 insertions(+) diff --git a/src/ast/value.rs b/src/ast/value.rs index 1401855f1..8337fbaa8 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -30,6 +30,9 @@ pub enum Value { Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), + /// e'string value' (postgres extension) + /// write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), + Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), Value::Boolean(v) => write!(f, "{}", v), @@ -193,6 +197,40 @@ pub fn escape_single_quote_string(s: &str) -> EscapeSingleQuoteString<'_> { EscapeSingleQuoteString(s) } +pub struct EscapeEscapedStringLiteral<'a>(&'a str); + +impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for c in self.0.chars() { + match c { + '\'' => { + write!(f, r#"\'"#)?; + } + '\\' => { + write!(f, r#"\\"#)?; + } + '\n' => { + write!(f, r#"\n"#)?; + } + '\t' => { + write!(f, r#"\t"#)?; + } + '\r' => { + write!(f, r#"\r"#)?; + } + _ => { + write!(f, "{}", c)?; + } + } + } + Ok(()) + } +} + +pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> { + EscapeEscapedStringLiteral(s) +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum TrimWhereField { diff --git a/src/parser.rs b/src/parser.rs index effbb93ab..b55ed6d55 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -497,6 +497,11 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } + Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } Token::Number(_, _) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) @@ -902,6 +907,7 @@ impl<'a> Parser<'a> { None } Token::SingleQuotedString(_) + | Token::EscapedStringLiteral(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), unexpected => { @@ -2576,6 +2582,7 @@ impl<'a> Parser<'a> { }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), + Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), unexpected => self.expected("a value", unexpected), @@ -2607,6 +2614,9 @@ impl<'a> Parser<'a> { match self.next_token() { Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), Token::SingleQuotedString(s) => Ok(s), + Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Ok(s) + } unexpected => self.expected("literal string", unexpected), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3fb0f66b2..91cb16a80 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -51,6 +51,8 @@ pub enum Token { SingleQuotedString(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' + EscapedStringLiteral(String), /// Hexadecimal string literal: i.e.: X'deadbeef' HexStringLiteral(String), /// Comma @@ -160,6 +162,7 @@ impl fmt::Display for Token { Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), + Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s), Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{}", ws), @@ -392,6 +395,21 @@ impl<'a> Tokenizer<'a> { } } } + // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. + x @ 'e' | x @ 'E' => { + chars.next(); // consume, to check the next char + match chars.peek() { + Some('\'') => { + let s = self.tokenize_escaped_single_quoted_string(chars)?; + Ok(Some(Token::EscapedStringLiteral(s))) + } + _ => { + // regular identifier starting with an "E" or "e" + let s = self.tokenize_word(x, chars); + Ok(Some(Token::make_word(&s, None))) + } + } + } // The spec only allows an uppercase 'X' to introduce a hex // string, but PostgreSQL, at least, allows a lowercase 'x' too. x @ 'x' | x @ 'X' => { @@ -690,6 +708,66 @@ impl<'a> Tokenizer<'a> { s } + /// Read a single quoted string, starting with the opening quote. + fn tokenize_escaped_single_quoted_string( + &self, + chars: &mut Peekable>, + ) -> Result { + let mut s = String::new(); + chars.next(); // consume the opening quote + + // slash escaping + let mut is_escaped = false; + while let Some(&ch) = chars.peek() { + macro_rules! escape_control_character { + ($ESCAPED:expr) => {{ + if is_escaped { + s.push($ESCAPED); + is_escaped = false; + } else { + s.push(ch); + } + + chars.next(); + }}; + } + + match ch { + '\'' => { + chars.next(); // consume + if is_escaped { + s.push(ch); + is_escaped = false; + } else if chars.peek().map(|c| *c == '\'').unwrap_or(false) { + s.push(ch); + chars.next(); + } else { + return Ok(s); + } + } + '\\' => { + if is_escaped { + s.push('\\'); + is_escaped = false; + } else { + is_escaped = true; + } + + chars.next(); + } + 'r' => escape_control_character!('\r'), + 'n' => escape_control_character!('\n'), + 't' => escape_control_character!('\t'), + _ => { + is_escaped = false; + chars.next(); // consume + s.push(ch); + } + } + } + self.tokenizer_error("Unterminated encoded string literal") + } + /// Read a single quoted string, starting with the opening quote. fn tokenize_single_quoted_string( &self, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 4fee8083a..b6c8255be 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1467,3 +1467,44 @@ fn pg_and_generic() -> TestedDialects { dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})], } } + +#[test] +fn parse_escaped_literal_string() { + let sql = + r#"SELECT E's1 \n s1', E's2 \\n s2', E's3 \\\n s3', E's4 \\\\n s4', E'\'', E'foo \\'"#; + let select = pg_and_generic().verified_only_select(sql); + assert_eq!(6, select.projection.len()); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("s1 \n s1".to_string())), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("s2 \\n s2".to_string())), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("s3 \\\n s3".to_string())), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("s4 \\\\n s4".to_string())), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("'".to_string())), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("foo \\".to_string())), + expr_from_projection(&select.projection[5]) + ); + + let sql = r#"SELECT E'\'"#; + assert_eq!( + pg_and_generic() + .parse_sql_statements(sql) + .unwrap_err() + .to_string(), + "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" + ); +} From 09bdb6bb8a1587913c5eeb9298a32b11a18761d4 Mon Sep 17 00:00:00 2001 From: mao <50707849+step-baby@users.noreply.github.com> Date: Thu, 26 May 2022 03:40:30 +0800 Subject: [PATCH 25/33] Support placeholders (`$` or `?`) in `LIMIT` clause (#494) * fix: limit $1 * feat: test limit $1 * Update Cargo.toml * cargo fmt Co-authored-by: Andrew Lamb --- src/parser.rs | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index b55ed6d55..2d6707443 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2592,6 +2592,7 @@ impl<'a> Parser<'a> { pub fn parse_number_value(&mut self) -> Result { match self.parse_value()? { v @ Value::Number(_, _) => Ok(v), + v @ Value::Placeholder(_) => Ok(v), _ => { self.prev_token(); self.expected("literal number", self.peek_token()) @@ -4356,7 +4357,7 @@ impl Word { #[cfg(test)] mod tests { use super::*; - use crate::test_utils::all_dialects; + use crate::test_utils::{all_dialects, TestedDialects}; #[test] fn test_prev_index() { @@ -4378,4 +4379,38 @@ mod tests { parser.prev_token(); }); } + + #[test] + fn test_parse_limit() { + let sql = "SELECT * FROM user LIMIT 1"; + all_dialects().run_parser_method(sql, |parser| { + let ast = parser.parse_query().unwrap(); + assert_eq!(ast.to_string(), sql.to_string()); + }); + + let sql = "SELECT * FROM user LIMIT $1 OFFSET $2"; + let dialects = TestedDialects { + dialects: vec![ + Box::new(PostgreSqlDialect {}), + Box::new(ClickHouseDialect {}), + Box::new(GenericDialect {}), + Box::new(MsSqlDialect {}), + Box::new(SnowflakeDialect {}), + ], + }; + + dialects.run_parser_method(sql, |parser| { + let ast = parser.parse_query().unwrap(); + assert_eq!(ast.to_string(), sql.to_string()); + }); + + let sql = "SELECT * FROM user LIMIT ? OFFSET ?"; + let dialects = TestedDialects { + dialects: vec![Box::new(MySqlDialect {})], + }; + dialects.run_parser_method(sql, |parser| { + let ast = parser.parse_query().unwrap(); + assert_eq!(ast.to_string(), sql.to_string()); + }); + } } From cd66034a4ae4559564f820ed7f8542f1495674cf Mon Sep 17 00:00:00 2001 From: Maciej Skrzypkowski Date: Wed, 25 May 2022 21:48:15 +0200 Subject: [PATCH 26/33] MERGE INTO semicolon handling (#508) * SGA-4181 MERGE INTO semicolon handling * fixed lint warning Co-authored-by: Maciej Skrzypkowski --- src/parser.rs | 2 +- tests/sqlparser_common.rs | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 2d6707443..35a8dc7fb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4250,7 +4250,7 @@ impl<'a> Parser<'a> { pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { let mut clauses: Vec = vec![]; loop { - if self.peek_token() == Token::EOF { + if self.peek_token() == Token::EOF || self.peek_token() == Token::SemiColon { break; } self.expect_keyword(Keyword::WHEN)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f09f8e93a..c5140d475 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4574,6 +4574,21 @@ fn test_merge_into_using_table() { verified_stmt(sql); } +#[test] +fn test_merge_with_delimiter() { + let sql = "MERGE INTO target_table USING source_table \ + ON target_table.id = source_table.oooid \ + WHEN MATCHED THEN \ + UPDATE SET target_table.description = source_table.description \ + WHEN NOT MATCHED THEN \ + INSERT (ID, description) VALUES (source_table.id, source_table.description);"; + + match parse_sql_statements(sql) { + Ok(_) => {} + _ => unreachable!(), + } +} + #[test] fn test_lock() { let sql = "SELECT * FROM student WHERE id = '1' FOR UPDATE"; From 901f5b974faaeb2b78254d184953b5ad9591b3b4 Mon Sep 17 00:00:00 2001 From: Riccardo Azzolini <103407078+razzolini-qpq@users.noreply.github.com> Date: Wed, 25 May 2022 22:01:13 +0200 Subject: [PATCH 27/33] Distinguish between tables and nullary functions in FROM (#506) --- src/ast/query.rs | 8 ++++++-- src/parser.rs | 4 ++-- src/test_utils.rs | 2 +- tests/sqlparser_bigquery.rs | 2 +- tests/sqlparser_common.rs | 36 ++++++++++++++++++++--------------- tests/sqlparser_mysql.rs | 6 +++--- tests/sqlparser_postgres.rs | 4 ++-- tests/sqlparser_redshift.rs | 4 ++-- tests/sqpparser_clickhouse.rs | 2 +- 9 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 472b9e6ba..07295f44f 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -337,7 +337,11 @@ pub enum TableFactor { /// Arguments of a table-valued function, as supported by Postgres /// and MSSQL. Note that deprecated MSSQL `FROM foo (NOLOCK)` syntax /// will also be parsed as `args`. - args: Vec, + /// + /// This field's value is `Some(v)`, where `v` is a (possibly empty) + /// vector of arguments, in the case of a table-valued function call, + /// whereas it's `None` in the case of a regular table name. + args: Option>, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, }, @@ -370,7 +374,7 @@ impl fmt::Display for TableFactor { with_hints, } => { write!(f, "{}", name)?; - if !args.is_empty() { + if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; } if let Some(alias) = alias { diff --git a/src/parser.rs b/src/parser.rs index 35a8dc7fb..b99d4999d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3623,9 +3623,9 @@ impl<'a> Parser<'a> { let name = self.parse_object_name()?; // Postgres, MSSQL: table-valued functions: let args = if self.consume_token(&Token::LParen) { - self.parse_optional_args()? + Some(self.parse_optional_args()?) } else { - vec![] + None }; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: diff --git a/src/test_utils.rs b/src/test_utils.rs index 1a432e47a..deb3e2580 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -177,7 +177,7 @@ pub fn table(name: impl Into) -> TableFactor { TableFactor::Table { name: ObjectName(vec![Ident::new(name.into())]), alias: None, - args: vec![], + args: None, with_hints: vec![], } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 380530199..2b49abf6f 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -29,7 +29,7 @@ fn parse_table_identifiers() { relation: TableFactor::Table { name: ObjectName(expected), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c5140d475..760f1d8c4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -200,7 +200,7 @@ fn parse_update_with_table_alias() { name: Ident::new("u"), columns: vec![] }), - args: vec![], + args: None, with_hints: vec![], }, joins: vec![] @@ -2793,7 +2793,7 @@ fn parse_delimited_identifiers() { } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); - assert!(args.is_empty()); + assert!(args.is_none()); assert!(with_hints.is_empty()); } _ => panic!("Expecting TableFactor::Table"), @@ -2912,6 +2912,12 @@ fn parse_from_advanced() { let _select = verified_only_select(sql); } +#[test] +fn parse_nullary_table_valued_function() { + let sql = "SELECT * FROM fn()"; + let _select = verified_only_select(sql); +} + #[test] fn parse_implicit_join() { let sql = "SELECT * FROM t1, t2"; @@ -2922,7 +2928,7 @@ fn parse_implicit_join() { relation: TableFactor::Table { name: ObjectName(vec!["t1".into()]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![], @@ -2931,7 +2937,7 @@ fn parse_implicit_join() { relation: TableFactor::Table { name: ObjectName(vec!["t2".into()]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![], @@ -2948,14 +2954,14 @@ fn parse_implicit_join() { relation: TableFactor::Table { name: ObjectName(vec!["t1a".into()]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![Join { relation: TableFactor::Table { name: ObjectName(vec!["t1b".into()]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), @@ -2965,14 +2971,14 @@ fn parse_implicit_join() { relation: TableFactor::Table { name: ObjectName(vec!["t2a".into()]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![Join { relation: TableFactor::Table { name: ObjectName(vec!["t2b".into()]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), @@ -2992,7 +2998,7 @@ fn parse_cross_join() { relation: TableFactor::Table { name: ObjectName(vec![Ident::new("t2")]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, join_operator: JoinOperator::CrossJoin @@ -3012,7 +3018,7 @@ fn parse_joins_on() { relation: TableFactor::Table { name: ObjectName(vec![Ident::new(relation.into())]), alias, - args: vec![], + args: None, with_hints: vec![], }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { @@ -3065,7 +3071,7 @@ fn parse_joins_using() { relation: TableFactor::Table { name: ObjectName(vec![Ident::new(relation.into())]), alias, - args: vec![], + args: None, with_hints: vec![], }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), @@ -3110,7 +3116,7 @@ fn parse_natural_join() { relation: TableFactor::Table { name: ObjectName(vec![Ident::new("t2")]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, join_operator: f(JoinConstraint::Natural), @@ -3348,7 +3354,7 @@ fn parse_derived_tables() { relation: TableFactor::Table { name: ObjectName(vec!["t2".into()]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), @@ -4431,7 +4437,7 @@ fn parse_merge() { name: Ident::new("dest"), columns: vec![] }), - args: vec![], + args: None, with_hints: vec![] } ); @@ -4452,7 +4458,7 @@ fn parse_merge() { relation: TableFactor::Table { name: ObjectName(vec![Ident::new("s"), Ident::new("foo")]), alias: None, - args: vec![], + args: None, with_hints: vec![] }, joins: vec![] diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ac265875d..9a7117a4a 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -626,7 +626,7 @@ fn parse_update_with_joins() { name: Ident::new("o"), columns: vec![] }), - args: vec![], + args: None, with_hints: vec![], }, joins: vec![Join { @@ -636,7 +636,7 @@ fn parse_update_with_joins() { name: Ident::new("c"), columns: vec![] }), - args: vec![], + args: None, with_hints: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { @@ -743,7 +743,7 @@ fn parse_substring_in_select() { quote_style: None }]), alias: None, - args: vec![], + args: None, with_hints: vec![] }, joins: vec![] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index b6c8255be..683ab55e6 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -413,7 +413,7 @@ fn parse_update_set_from() { relation: TableFactor::Table { name: ObjectName(vec![Ident::new("t1")]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![], @@ -439,7 +439,7 @@ fn parse_update_set_from() { relation: TableFactor::Table { name: ObjectName(vec![Ident::new("t1")]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![], diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index ce818a98d..6f77cf335 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -43,7 +43,7 @@ fn test_square_brackets_over_db_schema_table_name() { } ]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![], @@ -87,7 +87,7 @@ fn test_double_quotes_over_db_schema_table_name() { } ]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![], diff --git a/tests/sqpparser_clickhouse.rs b/tests/sqpparser_clickhouse.rs index ab8c28f21..72a1a0556 100644 --- a/tests/sqpparser_clickhouse.rs +++ b/tests/sqpparser_clickhouse.rs @@ -58,7 +58,7 @@ fn parse_map_access_expr() { relation: Table { name: ObjectName(vec![Ident::new("foos")]), alias: None, - args: vec![], + args: None, with_hints: vec![], }, joins: vec![] From 0fa812bd2bb6072a04e42ffce7f10e1fce8b7c24 Mon Sep 17 00:00:00 2001 From: Riccardo Azzolini <103407078+razzolini-qpq@users.noreply.github.com> Date: Wed, 25 May 2022 22:10:38 +0200 Subject: [PATCH 28/33] Fix parsing of COLLATE after parentheses in expressions (#507) --- src/parser.rs | 21 +++++++++++---------- tests/sqlparser_common.rs | 9 +++++++++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index b99d4999d..02a739954 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -525,17 +525,18 @@ impl<'a> Parser<'a> { }; self.expect_token(&Token::RParen)?; if !self.consume_token(&Token::Period) { - return Ok(expr); + Ok(expr) + } else { + let tok = self.next_token(); + let key = match tok { + Token::Word(word) => word.to_ident(), + _ => return parser_err!(format!("Expected identifier, found: {}", tok)), + }; + Ok(Expr::CompositeAccess { + expr: Box::new(expr), + key, + }) } - let tok = self.next_token(); - let key = match tok { - Token::Word(word) => word.to_ident(), - _ => return parser_err!(format!("Expected identifier, found: {}", tok)), - }; - Ok(Expr::CompositeAccess { - expr: Box::new(expr), - key, - }) } Token::Placeholder(_) => { self.prev_token(); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 760f1d8c4..e931cd96f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -561,6 +561,15 @@ fn parse_collate() { ); } +#[test] +fn parse_collate_after_parens() { + let sql = "SELECT (name) COLLATE \"de_DE\" FROM customer"; + assert_matches!( + only(&all_dialects().verified_only_select(sql).projection), + SelectItem::UnnamedExpr(Expr::Collate { .. }) + ); +} + #[test] fn parse_select_string_predicate() { let sql = "SELECT id, fname, lname FROM customer \ From cc2559c09772d8f9f487103f7cf76094e3d0c9cd Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Thu, 26 May 2022 23:30:10 +0200 Subject: [PATCH 29/33] hive: add create function syntax (#496) Signed-off-by: Maciej Obuchowski --- src/ast/mod.rs | 44 ++++++++++++++++++++++++++++++++++++++++ src/keywords.rs | 3 +++ src/parser.rs | 38 ++++++++++++++++++++++++++++++++++ tests/sqlparser_hive.rs | 45 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 128 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cc5fa6044..f3ef319e8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -981,6 +981,15 @@ pub enum Statement { location: Option, managed_location: Option, }, + /// CREATE FUNCTION + /// + /// Hive: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction + CreateFunction { + temporary: bool, + name: ObjectName, + class_name: String, + using: Option, + }, /// `ASSERT [AS ]` Assert { condition: Expr, @@ -1320,6 +1329,22 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateFunction { + temporary, + name, + class_name, + using, + } => { + write!( + f, + "CREATE {temp}FUNCTION {name} AS '{class_name}'", + temp = if *temporary { "TEMPORARY " } else { "" }, + )?; + if let Some(u) = using { + write!(f, " {}", u)?; + } + Ok(()) + } Statement::CreateView { name, or_replace, @@ -2568,6 +2593,25 @@ impl fmt::Display for DiscardObject { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum CreateFunctionUsing { + Jar(String), + File(String), + Archive(String), +} + +impl fmt::Display for CreateFunctionUsing { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "USING ")?; + match self { + CreateFunctionUsing::Jar(uri) => write!(f, "JAR '{uri}'"), + CreateFunctionUsing::File(uri) => write!(f, "FILE '{uri}'"), + CreateFunctionUsing::Archive(uri) => write!(f, "ARCHIVE '{uri}'"), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/keywords.rs b/src/keywords.rs index b4ddb68dd..d739aecd3 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -76,6 +76,7 @@ define_keywords!( AND, ANY, APPLY, + ARCHIVE, ARE, ARRAY, ARRAY_AGG, @@ -223,6 +224,7 @@ define_keywords!( FALSE, FETCH, FIELDS, + FILE, FILTER, FIRST, FIRST_VALUE, @@ -277,6 +279,7 @@ define_keywords!( ISODOW, ISOLATION, ISOYEAR, + JAR, JOIN, JSONFILE, JULIAN, diff --git a/src/parser.rs b/src/parser.rs index 02a739954..23fd79abb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1615,6 +1615,8 @@ impl<'a> Parser<'a> { self.parse_create_schema() } else if self.parse_keyword(Keyword::DATABASE) { self.parse_create_database() + } else if dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::FUNCTION) { + self.parse_create_function(temporary) } else { self.expected("an object type after CREATE", self.peek_token()) } @@ -1671,6 +1673,42 @@ impl<'a> Parser<'a> { }) } + pub fn parse_optional_create_function_using( + &mut self, + ) -> Result, ParserError> { + if !self.parse_keyword(Keyword::USING) { + return Ok(None); + }; + let keyword = + self.expect_one_of_keywords(&[Keyword::JAR, Keyword::FILE, Keyword::ARCHIVE])?; + + let uri = self.parse_literal_string()?; + + match keyword { + Keyword::JAR => Ok(Some(CreateFunctionUsing::Jar(uri))), + Keyword::FILE => Ok(Some(CreateFunctionUsing::File(uri))), + Keyword::ARCHIVE => Ok(Some(CreateFunctionUsing::Archive(uri))), + _ => self.expected( + "JAR, FILE or ARCHIVE, got {:?}", + Token::make_keyword(format!("{:?}", keyword).as_str()), + ), + } + } + + pub fn parse_create_function(&mut self, temporary: bool) -> Result { + let name = self.parse_object_name()?; + self.expect_keyword(Keyword::AS)?; + let class_name = self.parse_literal_string()?; + let using = self.parse_optional_create_function_using()?; + + Ok(Statement::CreateFunction { + temporary, + name, + class_name, + using, + }) + } + pub fn parse_create_external_table( &mut self, or_replace: bool, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 71b391a34..fa2486120 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -15,8 +15,8 @@ //! Test SQL syntax specific to Hive. The parser based on the generic dialect //! is also tested (on the inputs it can handle). -use sqlparser::ast::{Ident, ObjectName, SetVariableValue, Statement}; -use sqlparser::dialect::HiveDialect; +use sqlparser::ast::{CreateFunctionUsing, Ident, ObjectName, SetVariableValue, Statement}; +use sqlparser::dialect::{GenericDialect, HiveDialect}; use sqlparser::parser::ParserError; use sqlparser::test_utils::*; @@ -232,6 +232,47 @@ fn set_statement_with_minus() { ) } +#[test] +fn parse_create_function() { + let sql = "CREATE TEMPORARY FUNCTION mydb.myfunc AS 'org.random.class.Name' USING JAR 'hdfs://somewhere.com:8020/very/far'"; + match hive().verified_stmt(sql) { + Statement::CreateFunction { + temporary, + name, + class_name, + using, + } => { + assert!(temporary); + assert_eq!("mydb.myfunc", name.to_string()); + assert_eq!("org.random.class.Name", class_name); + assert_eq!( + using, + Some(CreateFunctionUsing::Jar( + "hdfs://somewhere.com:8020/very/far".to_string() + )) + ) + } + _ => unreachable!(), + } + + let generic = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + }; + + assert_eq!( + generic.parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError( + "Expected an object type after CREATE, found: FUNCTION".to_string() + ) + ); + + let sql = "CREATE TEMPORARY FUNCTION mydb.myfunc AS 'org.random.class.Name' USING JAR"; + assert_eq!( + hive().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("Expected literal string, found: EOF".to_string()), + ); +} + fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], From d19c6c323cc64ea59b8a1cc4b45a5a820b6f14c1 Mon Sep 17 00:00:00 2001 From: Riccardo Azzolini <103407078+razzolini-qpq@users.noreply.github.com> Date: Fri, 27 May 2022 12:25:24 +0200 Subject: [PATCH 30/33] Fix escaping of trailing quote in quoted identifiers (#505) * Generalize EscapeSingleQuoteString to arbitrary quote character * Fix escaping of trailing quote in quoted identifiers * Add new tests instead of modifying existing tests --- src/ast/mod.rs | 14 +++----------- src/ast/value.rs | 21 ++++++++++++++------- tests/sqlparser_mysql.rs | 34 ++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 5 +++++ 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f3ef319e8..e31701470 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -23,7 +23,7 @@ use alloc::{ string::{String, ToString}, vec::Vec, }; -use core::fmt::{self, Write}; +use core::fmt; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -128,16 +128,8 @@ impl fmt::Display for Ident { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.quote_style { Some(q) if q == '"' || q == '\'' || q == '`' => { - f.write_char(q)?; - let mut first = true; - for s in self.value.split_inclusive(q) { - if !first { - f.write_char(q)?; - } - first = false; - f.write_str(s)?; - } - f.write_char(q) + let escaped = value::escape_quoted_string(&self.value, q); + write!(f, "{}{}{}", q, escaped, q) } Some(q) if q == '[' => write!(f, "[{}]", self.value), None => f.write_str(&self.value), diff --git a/src/ast/value.rs b/src/ast/value.rs index 8337fbaa8..9c32f27d5 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -178,13 +178,16 @@ impl fmt::Display for DateTimeField { } } -pub struct EscapeSingleQuoteString<'a>(&'a str); +pub struct EscapeQuotedString<'a> { + string: &'a str, + quote: char, +} -impl<'a> fmt::Display for EscapeSingleQuoteString<'a> { +impl<'a> fmt::Display for EscapeQuotedString<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for c in self.0.chars() { - if c == '\'' { - write!(f, "\'\'")?; + for c in self.string.chars() { + if c == self.quote { + write!(f, "{q}{q}", q = self.quote)?; } else { write!(f, "{}", c)?; } @@ -193,8 +196,12 @@ impl<'a> fmt::Display for EscapeSingleQuoteString<'a> { } } -pub fn escape_single_quote_string(s: &str) -> EscapeSingleQuoteString<'_> { - EscapeSingleQuoteString(s) +pub fn escape_quoted_string(string: &str, quote: char) -> EscapeQuotedString<'_> { + EscapeQuotedString { string, quote } +} + +pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> { + escape_quoted_string(s, '\'') } pub struct EscapeEscapedStringLiteral<'a>(&'a str); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 9a7117a4a..10aa803f6 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -325,6 +325,40 @@ fn parse_quote_identifiers_2() { ); } +#[test] +fn parse_quote_identifiers_3() { + let sql = "SELECT ```quoted identifier```"; + assert_eq!( + mysql().verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: SetExpr::Select(Box::new(Select { + distinct: false, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { + value: "`quoted identifier`".into(), + quote_style: Some('`'), + }))], + into: None, + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + qualify: None + })), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + lock: None, + })) + ); +} + #[test] fn parse_unterminated_escape() { let sql = r#"SELECT 'I\'m not fine\'"#; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 683ab55e6..676ac6a8a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1441,6 +1441,11 @@ fn parse_quoted_identifier() { pg_and_generic().verified_stmt(r#"SELECT "quoted "" ident""#); } +#[test] +fn parse_quoted_identifier_2() { + pg_and_generic().verified_stmt(r#"SELECT """quoted ident""""#); +} + #[test] fn parse_local_and_global() { pg_and_generic().verified_stmt("CREATE LOCAL TEMPORARY TABLE table (COL INT)"); From aa46e930c5ed1125820538d3e75492de4b35ee3d Mon Sep 17 00:00:00 2001 From: sivchari Date: Fri, 27 May 2022 19:27:51 +0900 Subject: [PATCH 31/33] Support `UNNEST` as a table factor (#493) * support unnest * add test * fix ast * Update condition for BigQueryDialect or GenericDialect I updated condition. This changes conditionalize parsing for only BigQueryDialect or GenericDialect. * Add some tests * fix test --- src/ast/query.rs | 27 ++++++++++++++ src/parser.rs | 26 ++++++++++++- tests/sqlparser_common.rs | 78 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 07295f44f..6212469f4 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -355,6 +355,19 @@ pub enum TableFactor { expr: Expr, alias: Option, }, + /// SELECT * FROM UNNEST ([10,20,30]) as numbers WITH OFFSET; + /// +---------+--------+ + /// | numbers | offset | + /// +---------+--------+ + /// | 10 | 0 | + /// | 20 | 1 | + /// | 30 | 2 | + /// +---------+--------+ + UNNEST { + alias: Option, + array_expr: Box, + with_offset: bool, + }, /// Represents a parenthesized table factor. The SQL spec only allows a /// join expression (`(foo bar [ baz ... ])`) to be nested, /// possibly several times. @@ -406,6 +419,20 @@ impl fmt::Display for TableFactor { } Ok(()) } + TableFactor::UNNEST { + alias, + array_expr, + with_offset, + } => { + write!(f, "UNNEST({})", array_expr)?; + if let Some(alias) = alias { + write!(f, " AS {}", alias)?; + } + if *with_offset { + write!(f, " WITH OFFSET")?; + } + Ok(()) + } TableFactor::NestedJoin(table_reference) => write!(f, "({})", table_reference), } } diff --git a/src/parser.rs b/src/parser.rs index 23fd79abb..36c2075c0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3242,6 +3242,7 @@ impl<'a> Parser<'a> { } else { vec![] }; + let mut lateral_views = vec![]; loop { if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { @@ -3490,7 +3491,6 @@ impl<'a> Parser<'a> { pub fn parse_table_and_joins(&mut self) -> Result { let relation = self.parse_table_factor()?; - // Note that for keywords to be properly handled here, they need to be // added to `RESERVED_FOR_TABLE_ALIAS`, otherwise they may be parsed as // a table alias. @@ -3635,6 +3635,7 @@ impl<'a> Parser<'a> { match &mut table_and_joins.relation { TableFactor::Derived { alias, .. } | TableFactor::Table { alias, .. } + | TableFactor::UNNEST { alias, .. } | TableFactor::TableFunction { alias, .. } => { // but not `FROM (mytable AS alias1) AS alias2`. if let Some(inner_alias) = alias { @@ -3658,6 +3659,29 @@ impl<'a> Parser<'a> { // appearing alone in parentheses (e.g. `FROM (mytable)`) self.expected("joined table", self.peek_token()) } + } else if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::UNNEST) + { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + + let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { + Ok(Some(alias)) => Some(alias), + Ok(None) => None, + Err(e) => return Err(e), + }; + + let with_offset = match self.expect_keywords(&[Keyword::WITH, Keyword::OFFSET]) { + Ok(()) => true, + Err(_) => false, + }; + + Ok(TableFactor::UNNEST { + alias, + array_expr: Box::new(expr), + with_offset, + }) } else { let name = self.parse_object_name()?; // Postgres, MSSQL: table-valued functions: diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e931cd96f..83dacb046 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2786,6 +2786,84 @@ fn parse_table_function() { ); } +#[test] +fn parse_unnest() { + fn chk(alias: bool, with_offset: bool, dialects: &TestedDialects, want: Vec) { + let sql = &format!( + "SELECT * FROM UNNEST(expr){}{}", + if alias { " AS numbers" } else { "" }, + if with_offset { " WITH OFFSET" } else { "" }, + ); + let select = dialects.verified_only_select(sql); + assert_eq!(select.from, want); + } + let dialects = TestedDialects { + dialects: vec![Box::new(BigQueryDialect {}), Box::new(GenericDialect {})], + }; + // 1. both Alias and WITH OFFSET clauses. + chk( + true, + true, + &dialects, + vec![TableWithJoins { + relation: TableFactor::UNNEST { + alias: Some(TableAlias { + name: Ident::new("numbers"), + columns: vec![], + }), + array_expr: Box::new(Expr::Identifier(Ident::new("expr"))), + with_offset: true, + }, + joins: vec![], + }], + ); + // 2. neither Alias nor WITH OFFSET clause. + chk( + false, + false, + &dialects, + vec![TableWithJoins { + relation: TableFactor::UNNEST { + alias: None, + array_expr: Box::new(Expr::Identifier(Ident::new("expr"))), + with_offset: false, + }, + joins: vec![], + }], + ); + // 3. Alias but no WITH OFFSET clause. + chk( + false, + true, + &dialects, + vec![TableWithJoins { + relation: TableFactor::UNNEST { + alias: None, + array_expr: Box::new(Expr::Identifier(Ident::new("expr"))), + with_offset: true, + }, + joins: vec![], + }], + ); + // 4. WITH OFFSET but no Alias. + chk( + true, + false, + &dialects, + vec![TableWithJoins { + relation: TableFactor::UNNEST { + alias: Some(TableAlias { + name: Ident::new("numbers"), + columns: vec![], + }), + array_expr: Box::new(Expr::Identifier(Ident::new("expr"))), + with_offset: false, + }, + joins: vec![], + }], + ); +} + #[test] fn parse_delimited_identifiers() { // check that quoted identifiers in any position remain quoted after serialization From d19d955d9b91a330d8da701e599cbc9e824b14ea Mon Sep 17 00:00:00 2001 From: Yoshiyuki Komazaki Date: Sun, 5 Jun 2022 03:19:57 +0900 Subject: [PATCH 32/33] Support `DATETIME` keyword (#512) --- src/ast/data_type.rs | 3 +++ src/keywords.rs | 1 + src/parser.rs | 1 + tests/sqlparser_common.rs | 13 +++++++++++++ 4 files changed, 18 insertions(+) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 684a5ef57..dc434a8f0 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -71,6 +71,8 @@ pub enum DataType { Date, /// Time Time, + /// Datetime + Datetime, /// Timestamp Timestamp, /// Interval @@ -143,6 +145,7 @@ impl fmt::Display for DataType { DataType::Boolean => write!(f, "BOOLEAN"), DataType::Date => write!(f, "DATE"), DataType::Time => write!(f, "TIME"), + DataType::Datetime => write!(f, "DATETIME"), DataType::Timestamp => write!(f, "TIMESTAMP"), DataType::Interval => write!(f, "INTERVAL"), DataType::Regclass => write!(f, "REGCLASS"), diff --git a/src/keywords.rs b/src/keywords.rs index d739aecd3..0024c1839 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -169,6 +169,7 @@ define_keywords!( DATA, DATABASE, DATE, + DATETIME, DAY, DEALLOCATE, DEC, diff --git a/src/parser.rs b/src/parser.rs index 36c2075c0..02e81c128 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2733,6 +2733,7 @@ impl<'a> Parser<'a> { } Keyword::UUID => Ok(DataType::Uuid), Keyword::DATE => Ok(DataType::Date), + Keyword::DATETIME => Ok(DataType::Datetime), Keyword::TIMESTAMP => { // TBD: we throw away "with/without timezone" information if self.parse_keyword(Keyword::WITH) || self.parse_keyword(Keyword::WITHOUT) { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 83dacb046..bc715a096 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2615,6 +2615,19 @@ fn parse_literal_time() { ); } +#[test] +fn parse_literal_datetime() { + let sql = "SELECT DATETIME '1999-01-01 01:23:34.45'"; + let select = verified_only_select(sql); + assert_eq!( + &Expr::TypedString { + data_type: DataType::Datetime, + value: "1999-01-01 01:23:34.45".into() + }, + expr_from_projection(only(&select.projection)), + ); +} + #[test] fn parse_literal_timestamp() { let sql = "SELECT TIMESTAMP '1999-01-01 01:23:34'"; From 66a3082cb6400222142337793b1182b377b0e62d Mon Sep 17 00:00:00 2001 From: Dmitry Patsura Date: Sat, 4 Jun 2022 21:24:36 +0300 Subject: [PATCH 33/33] feat: Support FETCH (cursors) (#510) --- src/ast/mod.rs | 89 +++++++++++++++++++++++++++++++++++++ src/keywords.rs | 5 +++ src/parser.rs | 62 ++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 24 ++++++++++ 4 files changed, 180 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e31701470..a2cdf8981 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -895,6 +895,17 @@ pub enum Statement { /// deleted along with the dropped table purge: bool, }, + /// FETCH - retrieve rows from a query using a cursor + /// + /// Note: this is a PostgreSQL-specific statement, + /// but may also compatible with other SQL. + Fetch { + /// Cursor name + name: Ident, + direction: FetchDirection, + /// Optional, It's possible to fetch rows form cursor to the table + into: Option, + }, /// DISCARD [ ALL | PLANS | SEQUENCES | TEMPORARY | TEMP ] /// /// Note: this is a PostgreSQL-specific statement, @@ -1114,6 +1125,21 @@ impl fmt::Display for Statement { write!(f, "{}", statement) } Statement::Query(s) => write!(f, "{}", s), + Statement::Fetch { + name, + direction, + into, + } => { + write!(f, "FETCH {} ", direction)?; + + write!(f, "IN {}", name)?; + + if let Some(into) = into { + write!(f, " INTO {}", into)?; + } + + Ok(()) + } Statement::Directory { overwrite, local, @@ -1859,6 +1885,69 @@ impl fmt::Display for Privileges { } } +/// Specific direction for FETCH statement +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum FetchDirection { + Count { limit: Value }, + Next, + Prior, + First, + Last, + Absolute { limit: Value }, + Relative { limit: Value }, + All, + // FORWARD + // FORWARD count + Forward { limit: Option }, + ForwardAll, + // BACKWARD + // BACKWARD count + Backward { limit: Option }, + BackwardAll, +} + +impl fmt::Display for FetchDirection { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FetchDirection::Count { limit } => f.write_str(&limit.to_string())?, + FetchDirection::Next => f.write_str("NEXT")?, + FetchDirection::Prior => f.write_str("PRIOR")?, + FetchDirection::First => f.write_str("FIRST")?, + FetchDirection::Last => f.write_str("LAST")?, + FetchDirection::Absolute { limit } => { + f.write_str("ABSOLUTE ")?; + f.write_str(&limit.to_string())?; + } + FetchDirection::Relative { limit } => { + f.write_str("RELATIVE ")?; + f.write_str(&limit.to_string())?; + } + FetchDirection::All => f.write_str("ALL")?, + FetchDirection::Forward { limit } => { + f.write_str("FORWARD")?; + + if let Some(l) = limit { + f.write_str(" ")?; + f.write_str(&l.to_string())?; + } + } + FetchDirection::ForwardAll => f.write_str("FORWARD ALL")?, + FetchDirection::Backward { limit } => { + f.write_str("BACKWARD")?; + + if let Some(l) = limit { + f.write_str(" ")?; + f.write_str(&l.to_string())?; + } + } + FetchDirection::BackwardAll => f.write_str("BACKWARD ALL")?, + }; + + Ok(()) + } +} + /// A privilege on a database object (table, sequence, etc.). #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/keywords.rs b/src/keywords.rs index 0024c1839..99cb83155 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -67,6 +67,7 @@ macro_rules! define_keywords { define_keywords!( ABORT, ABS, + ABSOLUTE, ACTION, ADD, ALL, @@ -93,6 +94,7 @@ define_keywords!( AUTO_INCREMENT, AVG, AVRO, + BACKWARD, BEGIN, BEGIN_FRAME, BEGIN_PARTITION, @@ -239,6 +241,7 @@ define_keywords!( FORCE_QUOTE, FOREIGN, FORMAT, + FORWARD, FRAME_ROW, FREE, FREEZE, @@ -387,6 +390,7 @@ define_keywords!( PREPARE, PRESERVE, PRIMARY, + PRIOR, PRIVILEGES, PROCEDURE, PROGRAM, @@ -415,6 +419,7 @@ define_keywords!( REGR_SXX, REGR_SXY, REGR_SYY, + RELATIVE, RELEASE, RENAME, REPAIR, diff --git a/src/parser.rs b/src/parser.rs index 02e81c128..383248b16 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -167,6 +167,7 @@ impl<'a> Parser<'a> { Keyword::CREATE => Ok(self.parse_create()?), Keyword::DROP => Ok(self.parse_drop()?), Keyword::DISCARD => Ok(self.parse_discard()?), + Keyword::FETCH => Ok(self.parse_fetch_statement()?), Keyword::DELETE => Ok(self.parse_delete()?), Keyword::INSERT => Ok(self.parse_insert()?), Keyword::UPDATE => Ok(self.parse_update()?), @@ -1824,6 +1825,67 @@ impl<'a> Parser<'a> { }) } + // FETCH [ direction { FROM | IN } ] cursor INTO target; + pub fn parse_fetch_statement(&mut self) -> Result { + let direction = if self.parse_keyword(Keyword::NEXT) { + FetchDirection::Next + } else if self.parse_keyword(Keyword::PRIOR) { + FetchDirection::Prior + } else if self.parse_keyword(Keyword::FIRST) { + FetchDirection::First + } else if self.parse_keyword(Keyword::LAST) { + FetchDirection::Last + } else if self.parse_keyword(Keyword::ABSOLUTE) { + FetchDirection::Absolute { + limit: self.parse_number_value()?, + } + } else if self.parse_keyword(Keyword::RELATIVE) { + FetchDirection::Relative { + limit: self.parse_number_value()?, + } + } else if self.parse_keyword(Keyword::FORWARD) { + if self.parse_keyword(Keyword::ALL) { + FetchDirection::ForwardAll + } else { + FetchDirection::Forward { + // TODO: Support optional + limit: Some(self.parse_number_value()?), + } + } + } else if self.parse_keyword(Keyword::BACKWARD) { + if self.parse_keyword(Keyword::ALL) { + FetchDirection::BackwardAll + } else { + FetchDirection::Backward { + // TODO: Support optional + limit: Some(self.parse_number_value()?), + } + } + } else if self.parse_keyword(Keyword::ALL) { + FetchDirection::All + } else { + FetchDirection::Count { + limit: self.parse_number_value()?, + } + }; + + self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; + + let name = self.parse_identifier()?; + + let into = if self.parse_keyword(Keyword::INTO) { + Some(self.parse_object_name()?) + } else { + None + }; + + Ok(Statement::Fetch { + name, + direction, + into, + }) + } + pub fn parse_discard(&mut self) -> Result { let object_type = if self.parse_keyword(Keyword::ALL) { DiscardObject::ALL diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 676ac6a8a..84d3b2088 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1513,3 +1513,27 @@ fn parse_escaped_literal_string() { "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" ); } + +#[test] +fn parse_fetch() { + pg_and_generic().verified_stmt("FETCH 2048 IN \"SQL_CUR0x7fa44801bc00\""); + pg_and_generic().verified_stmt("FETCH 2048 IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic().verified_stmt("FETCH NEXT IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic().verified_stmt("FETCH PRIOR IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic().verified_stmt("FETCH FIRST IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic().verified_stmt("FETCH LAST IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic() + .verified_stmt("FETCH ABSOLUTE 2048 IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic() + .verified_stmt("FETCH RELATIVE 2048 IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic().verified_stmt("FETCH ALL IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic().verified_stmt("FETCH ALL IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic() + .verified_stmt("FETCH FORWARD 2048 IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic() + .verified_stmt("FETCH FORWARD ALL IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic() + .verified_stmt("FETCH BACKWARD 2048 IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); + pg_and_generic() + .verified_stmt("FETCH BACKWARD ALL IN \"SQL_CUR0x7fa44801bc00\" INTO \"new_table\""); +}