Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,43 @@ impl fmt::Display for CaseWhen {
}
}

/// Parsing mode for `XMLPARSE`.
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum XmlParseMode {
/// `CONTENT`
Content,
/// `DOCUMENT`
Document,
}

impl fmt::Display for XmlParseMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
XmlParseMode::Content => write!(f, "CONTENT"),
XmlParseMode::Document => write!(f, "DOCUMENT"),
}
}
}

/// `XMLPARSE(CONTENT|DOCUMENT expr)`.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct XmlParseExpr {
/// Parsing mode.
pub mode: XmlParseMode,
/// Expression to parse as XML.
pub expr: Box<Expr>,
}

impl fmt::Display for XmlParseExpr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "XMLPARSE({} {})", self.mode, self.expr)
}
}

/// An SQL expression of any type.
///
/// # Semantics / Type Checking
Expand Down Expand Up @@ -1233,6 +1270,8 @@ pub enum Expr {
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
/// as well as constants of other types (a non-standard PostgreSQL extension).
TypedString(TypedString),
/// XML parse expression: `XMLPARSE(CONTENT|DOCUMENT expr)`.
XmlParse(XmlParseExpr),
/// Scalar function call e.g. `LEFT(foo, 5)`
Function(Function),
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
Expand Down Expand Up @@ -2015,6 +2054,7 @@ impl fmt::Display for Expr {
Expr::Value(v) => write!(f, "{v}"),
Expr::Prefixed { prefix, value } => write!(f, "{prefix} {value}"),
Expr::TypedString(ts) => ts.fmt(f),
Expr::XmlParse(xml_parse) => xml_parse.fmt(f),
Expr::Function(fun) => fun.fmt(f),
Expr::Case {
case_token: _,
Expand Down
1 change: 1 addition & 0 deletions src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1555,6 +1555,7 @@ impl Spanned for Expr {
Expr::Nested(expr) => expr.span(),
Expr::Value(value) => value.span(),
Expr::TypedString(TypedString { value, .. }) => value.span(),
Expr::XmlParse(xml_parse) => xml_parse.expr.span(),
Expr::Function(function) => function.span(),
Expr::GroupingSets(vec) => {
union_spans(vec.iter().flat_map(|i| i.iter().map(|k| k.span())))
Expand Down
35 changes: 35 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2527,8 +2527,43 @@ impl<'a> Parser<'a> {
})
}

/// Consume the next token if it is an unquoted word matching `expected`
/// (case-insensitive), returning whether it was consumed.
fn parse_unquoted_word_value(&mut self, expected: &str) -> bool {
if let Token::Word(word) = &self.peek_token_ref().token {
if word.quote_style.is_none() && word.value.eq_ignore_ascii_case(expected) {
self.next_token();
return true;
}
}
false
}

fn parse_xml_parse_mode(&mut self) -> Result<XmlParseMode, ParserError> {
if self.parse_unquoted_word_value("content") {
Ok(XmlParseMode::Content)
} else if self.parse_unquoted_word_value("document") {
Ok(XmlParseMode::Document)
} else {
self.expected_ref("CONTENT or DOCUMENT", self.peek_token_ref())
}
}

fn parse_xmlparse_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let mode = self.parse_xml_parse_mode()?;
let expr = Box::new(self.parse_expr()?);
self.expect_token(&Token::RParen)?;
Ok(Expr::XmlParse(XmlParseExpr { mode, expr }))
}

/// Parse a function call expression named by `name` and return it as an `Expr`.
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
if self.dialect.supports_xml_expressions()
&& Self::is_simple_unquoted_object_name(&name, "xmlparse")
{
return self.parse_xmlparse_expr();
}
self.parse_function_call(name).map(Expr::Function)
}

Expand Down
40 changes: 40 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19014,6 +19014,46 @@ fn parse_aliased_function_args() {
.is_err());
}

#[test]
fn parse_xmlparse() {
let dialects = all_dialects_where(|d| d.supports_xml_expressions());

let select = dialects.verified_only_select_with_canonical(
"SELECT xmlparse(content '<a/>')",
"SELECT XMLPARSE(CONTENT '<a/>')",
);
match &select.projection[0] {
UnnamedExpr(Expr::XmlParse(XmlParseExpr { mode, .. })) => {
assert_eq!(*mode, XmlParseMode::Content);
}
item => panic!("expected XmlParse expression, got {item:?}"),
}

let select = dialects.verified_only_select_with_canonical(
"SELECT xmlparse(document '<a/>')",
"SELECT XMLPARSE(DOCUMENT '<a/>')",
);
match &select.projection[0] {
UnnamedExpr(Expr::XmlParse(XmlParseExpr { mode, .. })) => {
assert_eq!(*mode, XmlParseMode::Document);
}
item => panic!("expected XmlParse expression, got {item:?}"),
}

// XMLPARSE requires a CONTENT or DOCUMENT mode.
assert!(dialects
.parse_sql_statements("SELECT xmlparse('<a/>')")
.is_err());

// On dialects without XML support, `xmlparse` stays a regular function
// and the special `CONTENT <expr>` syntax is rejected.
let others = all_dialects_except(|d| d.supports_xml_expressions());
others.verified_only_select("SELECT xmlparse(1)");
assert!(others
.parse_sql_statements("SELECT xmlparse(content '<a/>')")
.is_err());
}

/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on
/// inputs like `IF a0.a1...aN.#`. The parse is run on a worker thread and the
/// main thread asserts that it reports back within a generous timeout. Post-fix
Expand Down
41 changes: 41 additions & 0 deletions tests/sqlparser_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3951,6 +3951,47 @@ fn parse_xmlforest_aliased_arguments() {
);
}

#[test]
fn parse_xmlparse() {
// Regression statements covering CONTENT and DOCUMENT modes with valid,
// invalid, and edge-case XML strings (parsing only, no semantic checks).
let statements = [
"SELECT XMLPARSE(CONTENT '')",
"SELECT XMLPARSE(CONTENT ' ')",
"SELECT XMLPARSE(CONTENT 'abc')",
"SELECT XMLPARSE(CONTENT '<abc>x</abc>')",
"SELECT XMLPARSE(CONTENT '<invalidentity>&</invalidentity>')",
"SELECT XMLPARSE(CONTENT '<undefinedentity>&idontexist;</undefinedentity>')",
"SELECT XMLPARSE(CONTENT '<twoerrors>&idontexist;</unbalanced>')",
"SELECT XMLPARSE(CONTENT '<nosuchprefix:tag/>')",
"SELECT XMLPARSE(DOCUMENT ' ')",
"SELECT XMLPARSE(DOCUMENT 'abc')",
"SELECT XMLPARSE(DOCUMENT '<abc>x</abc>')",
"SELECT XMLPARSE(DOCUMENT '<invalidentity>&</abc>')",
"SELECT XMLPARSE(DOCUMENT '<undefinedentity>&idontexist;</abc>')",
"SELECT XMLPARSE(DOCUMENT '<twoerrors>&idontexist;</unbalanced>')",
"SELECT XMLPARSE(DOCUMENT '<nosuchprefix:tag/>')",
];
for sql in statements {
pg().verified_stmt(sql);
}

// Lowercase keywords canonicalize to uppercase.
let select = pg().verified_only_select_with_canonical(
"SELECT xmlparse(content '<a/>')",
"SELECT XMLPARSE(CONTENT '<a/>')",
);
assert_eq!(
expr_from_projection(&select.projection[0]),
&Expr::XmlParse(XmlParseExpr {
mode: XmlParseMode::Content,
expr: Box::new(Expr::Value(
Value::SingleQuotedString("<a/>".to_string()).into()
)),
})
);
}

#[test]
fn parse_xml_typed_string() {
// xml '...' should parse as a TypedString on PostgreSQL and Generic
Expand Down
Loading