From 68e8f7194685d55567d65636c6a0ba28a1185eed Mon Sep 17 00:00:00 2001 From: Sarath Francis Date: Fri, 12 Jun 2026 06:53:05 -0400 Subject: [PATCH] fix: keep spaces around - operator in dialects with dashed identifiers In BigQuery (and any dialect allowing dashes inside identifiers) the denseOperators option turned "a - b" into "a-b", which then re-parses as a single dashed identifier - corrupting the query. Skip densing the "-" operator for those dialects so the output round-trips. --- src/dialect.ts | 10 ++++++---- src/formatter/ExpressionFormatter.ts | 11 ++++++++++- test/bigquery.test.ts | 18 +++++++++++++++++- test/features/operators.ts | 11 ++++++++++- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/dialect.ts b/src/dialect.ts index a4ad930170..5a4b40ec77 100644 --- a/src/dialect.ts +++ b/src/dialect.ts @@ -34,15 +34,17 @@ export const createDialect = (options: DialectOptions): Dialect => { const dialectFromOptions = (dialectOptions: DialectOptions): Dialect => ({ tokenizer: new Tokenizer(dialectOptions.tokenizerOptions, dialectOptions.name), - formatOptions: processDialectFormatOptions(dialectOptions.formatOptions), + formatOptions: processDialectFormatOptions(dialectOptions), }); -const processDialectFormatOptions = ( - options: DialectFormatOptions -): ProcessedDialectFormatOptions => ({ +const processDialectFormatOptions = ({ + tokenizerOptions, + formatOptions: options, +}: DialectOptions): ProcessedDialectFormatOptions => ({ alwaysDenseOperators: options.alwaysDenseOperators || [], onelineClauses: Object.fromEntries(options.onelineClauses.map(name => [name, true])), tabularOnelineClauses: Object.fromEntries( (options.tabularOnelineClauses ?? options.onelineClauses).map(name => [name, true]) ), + identifierDashes: Boolean(tokenizerOptions.identChars?.dashes), }); diff --git a/src/formatter/ExpressionFormatter.ts b/src/formatter/ExpressionFormatter.ts index fa5271de8e..485eed9c35 100644 --- a/src/formatter/ExpressionFormatter.ts +++ b/src/formatter/ExpressionFormatter.ts @@ -60,6 +60,10 @@ export interface ProcessedDialectFormatOptions { alwaysDenseOperators: string[]; onelineClauses: Record; tabularOnelineClauses: Record; + // True when the dialect allows dashes inside identifiers (e.g. BigQuery). + // In such dialects the "-" operator must keep its surrounding spaces, + // otherwise "a - b" densed to "a-b" would re-parse as a single identifier. + identifierDashes: boolean; } /** Formats a generic SQL expression */ @@ -330,7 +334,12 @@ export default class ExpressionFormatter { } private formatOperator({ text }: OperatorNode) { - if (this.cfg.denseOperators || this.dialectCfg.alwaysDenseOperators.includes(text)) { + // In dialects that allow dashes inside identifiers (e.g. BigQuery) the "-" + // operator must keep its surrounding spaces. Densing "a - b" into "a-b" + // would otherwise re-parse as a single dashed identifier. + if (text === '-' && this.dialectCfg.identifierDashes) { + this.layout.add(text, WS.SPACE); + } else if (this.cfg.denseOperators || this.dialectCfg.alwaysDenseOperators.includes(text)) { this.layout.add(WS.NO_SPACE, text); } else if (text === ':') { this.layout.add(WS.NO_SPACE, text, WS.SPACE); diff --git a/test/bigquery.test.ts b/test/bigquery.test.ts index 596cd88470..f44fd91b20 100644 --- a/test/bigquery.test.ts +++ b/test/bigquery.test.ts @@ -58,7 +58,10 @@ describe('BigQueryFormatter', () => { 'EXCEPT DISTINCT', 'INTERSECT DISTINCT', ]); - supportsOperators(format, ['&', '|', '^', '~', '>>', '<<', '||', '=>'], { any: true }); + supportsOperators(format, ['&', '|', '^', '~', '>>', '<<', '||', '=>'], { + any: true, + identifierDashes: true, + }); supportsIsDistinctFrom(format); supportsParams(format, { positional: true, named: ['@'], quoted: ['@``'] }); supportsWindow(format); @@ -80,6 +83,19 @@ describe('BigQueryFormatter', () => { `); }); + // Because dashes are allowed inside identifiers, densing the "-" operator + // would glue its operands into a single identifier ("a - b" -> "a-b"), + // changing the meaning of the query. So denseOperators must keep it spaced. + it('keeps spaces around the - operator in dense mode', () => { + expect(format('SELECT a - b, x - foo(y)\nFROM t', { denseOperators: true })).toBe(dedent` + SELECT + a - b, + x - foo (y) + FROM + t + `); + }); + it('supports @@variables', () => { expect(format('SELECT @@error.message, @@time_zone')).toBe(dedent` SELECT diff --git a/test/features/operators.ts b/test/features/operators.ts index 9c814180df..cc7b79e89e 100644 --- a/test/features/operators.ts +++ b/test/features/operators.ts @@ -5,6 +5,9 @@ import { FormatFn } from '../../src/sqlFormatter.js'; type OperatorsConfig = { logicalOperators?: string[]; any?: boolean; + // True for dialects that allow dashes inside identifiers (e.g. BigQuery), + // where the "-" operator must keep its surrounding spaces even in dense mode. + identifierDashes?: boolean; }; export default function supportsOperators( @@ -27,7 +30,13 @@ export default function supportsOperators( operators.forEach(op => { it(`supports ${op} operator in dense mode`, () => { - expect(format(`foo ${op} bar`, { denseOperators: true })).toBe(`foo${op}bar`); + // In dialects with dashed identifiers, "foo-bar" would re-parse as a + // single identifier, so the "-" operator keeps its surrounding spaces. + if (op === '-' && cfg.identifierDashes) { + expect(format(`foo ${op} bar`, { denseOperators: true })).toBe(`foo ${op} bar`); + } else { + expect(format(`foo ${op} bar`, { denseOperators: true })).toBe(`foo${op}bar`); + } }); });