From 1b72761c428bec0889cf6318adaf39176a3afe23 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 17 Jun 2026 16:04:18 -0700 Subject: [PATCH 1/2] Add a mongot search sidecar to the replica-set target Signed-off-by: Daniel Frankcom --- dev/compose.yaml | 49 ++++++++++++++++++- dev/mongot.yml | 30 ++++++++++++ documentdb_tests/framework/engine_registry.py | 37 +++++++++++++- documentdb_tests/framework/preconditions.py | 6 +++ 4 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 dev/mongot.yml diff --git a/dev/compose.yaml b/dev/compose.yaml index 4258cecfb..b9ee7622e 100644 --- a/dev/compose.yaml +++ b/dev/compose.yaml @@ -27,7 +27,7 @@ # query: # # A service with no `x-test-target` is not a test target and is ignored by the -# registry. +# registry (e.g. the mongot sidecar, which is reached only through its mongod). # # Memory: each mongod caps its WiredTiger cache (--wiredTigerCacheSizeGB). By # default a mongod sizes its cache to ~50% of the host/VM RAM; with several @@ -60,7 +60,26 @@ services: mongo-replset: image: mongo:8.2.4 profiles: ["mongo-replset", "all"] - command: ["--replSet", "rs0", "--bind_ip_all", "--wiredTigerCacheSizeGB", "1.5"] + command: + - "--replSet" + - "rs0" + - "--bind_ip_all" + - "--wiredTigerCacheSizeGB" + - "1.5" + # Point at the mongot search sidecar so this replica set also serves the + # search surfaces. mongot is transparent to all other behavior, so the + # set behaves identically to a plain replica set apart from gaining + # search; it is one target, not two. + - "--setParameter" + - "mongotHost=mongot:27028" + - "--setParameter" + - "searchIndexManagementHostAndPort=mongot:27028" + - "--setParameter" + - "useGrpcForSearch=true" + - "--setParameter" + - "skipAuthenticationToMongot=true" + - "--setParameter" + - "skipAuthenticationToSearchIndexManagementServer=true" ports: - "27018:27017" healthcheck: @@ -71,3 +90,29 @@ services: x-test-target: engine: mongodb query: directConnection=true + + # mongot: the search sidecar for the mongo-replset target. Not a test target + # on its own; the suite reaches it only through mongo-replset. mongot is + # MongoDB Search Community Edition (SSPL, same license as the server). It + # replicates from the replica set as an authenticated sync source and reads + # its password from a file, so the entrypoint writes that file (a fixed + # local-dev secret, matched by the searchCoordinator user the harness creates + # on the replica set) with owner-only permissions before launching. It retries + # the connection until that user exists. + mongot: + image: mongodb/mongodb-community-search:latest + profiles: ["mongo-replset", "all"] + entrypoint: + - "sh" + - "-c" + - > + umask 077 && + mkdir -p /mongot-secrets && + printf '%s' "$$MONGOT_SYNC_PASSWORD" > /mongot-secrets/passwordFile && + exec /mongot-community/mongot --config /mongot-config/mongot.yml + environment: + # Fixed local-dev secret shared with the searchCoordinator user the + # harness provisions on mongo-replset. Not a real credential. + MONGOT_SYNC_PASSWORD: "searchSyncPassword" + volumes: + - ./mongot.yml:/mongot-config/mongot.yml:ro diff --git a/dev/mongot.yml b/dev/mongot.yml new file mode 100644 index 000000000..9860f54a2 --- /dev/null +++ b/dev/mongot.yml @@ -0,0 +1,30 @@ +# mongot configuration for the mongo-replset target (dev/compose.yaml service +# "mongot"). mongot is MongoDB Search Community Edition (SSPL), the same license +# as the server. It runs alongside the replica set's mongod and serves the +# search and vector search surfaces. +# +# mongot replicates from the mongod replica set as a sync source. It requires an +# authenticated connection (it has no unauthenticated mode), so it logs in as a +# dedicated user holding the searchCoordinator role. That user and its password +# file are provisioned by the target's startup (see dev/compose.yaml). +syncSource: + replicaSet: + hostAndPort: "mongo-replset:27017" + username: "searchSyncUser" + passwordFile: "/mongot-secrets/passwordFile" + authSource: "admin" + tls: false +storage: + dataPath: "/var/lib/mongot" +server: + grpc: + # mongod reaches mongot here (see mongotHost / searchIndexManagementHostAndPort + # on the mongo-replset service). Bound on all interfaces so the mongod + # container can connect over the compose network. + address: "0.0.0.0:27028" + tls: + mode: "disabled" +healthCheck: + address: "0.0.0.0:8080" +logging: + verbosity: INFO diff --git a/documentdb_tests/framework/engine_registry.py b/documentdb_tests/framework/engine_registry.py index 774ab7810..1e2bf1664 100644 --- a/documentdb_tests/framework/engine_registry.py +++ b/documentdb_tests/framework/engine_registry.py @@ -102,6 +102,15 @@ def _is_reachable(connection_string: str) -> bool: # replSetInitiate error code when the set is already initiated (e.g. a race # between concurrent callers); treated as success. _ALREADY_INITIALIZED = 23 +# createUser error code when the user already exists (idempotent re-runs). +_USER_ALREADY_EXISTS = 51003 + +# The user mongot authenticates as to replicate from a search-enabled mongod. +# Its name and password are a fixed local-dev secret matched by the mongot +# sidecar's config (see dev/mongot.yml and the mongot service in +# dev/compose.yaml); it is not a real credential. +_SEARCH_SYNC_USER = "searchSyncUser" +_SEARCH_SYNC_PASSWORD = "searchSyncPassword" def ensure_initiated(connection_string: str, timeout_s: float = 30.0) -> None: @@ -120,12 +129,14 @@ def ensure_initiated(connection_string: str, timeout_s: float = 30.0) -> None: that already initiated it (AlreadyInitialized) is tolerated. After initiating, it waits up to ``timeout_s`` for a primary to be elected - so callers can write immediately. + so callers can write immediately. A search-enabled mongod additionally has + the searchCoordinator user mongot needs provisioned once it is primary. """ client: MongoClient = MongoClient(connection_string, serverSelectionTimeoutMS=5000) try: try: client.admin.command("replSetGetStatus") + _ensure_search_user(client) # Idempotent; a no-op off a search target. return # Already initiated. except OperationFailure as exc: if exc.code != _NOT_YET_INITIALIZED: @@ -140,6 +151,7 @@ def ensure_initiated(connection_string: str, timeout_s: float = 30.0) -> None: deadline = time.monotonic() + timeout_s while time.monotonic() < deadline: if client.admin.command("hello").get("isWritablePrimary"): + _ensure_search_user(client) return time.sleep(0.5) raise TimeoutError( @@ -149,6 +161,29 @@ def ensure_initiated(connection_string: str, timeout_s: float = 30.0) -> None: client.close() +def _ensure_search_user(client: MongoClient) -> None: + """Provision the searchCoordinator user a search-enabled mongod needs. + + A search target points at a mongot sidecar (a non-empty ``mongotHost``). + mongot replicates from this mongod as an authenticated sync source, so it + needs a user with the searchCoordinator role to log in as. This creates that + user (idempotently) once the server is primary. It is a no-op on a target + without a mongot sidecar. + """ + if not client.admin.command({"getParameter": 1, "mongotHost": 1}).get("mongotHost"): + return # Not a search target. + try: + client.admin.command( + "createUser", + _SEARCH_SYNC_USER, + pwd=_SEARCH_SYNC_PASSWORD, + roles=[{"role": "searchCoordinator", "db": "admin"}], + ) + except OperationFailure as exc: + if exc.code != _USER_ALREADY_EXISTS: + raise + + def live_targets(compose_path: Path = COMPOSE_PATH) -> list[Target]: """Return the declared targets that are currently reachable.""" return [t for t in load_targets(compose_path) if _is_reachable(t.connection_string)] diff --git a/documentdb_tests/framework/preconditions.py b/documentdb_tests/framework/preconditions.py index c090c95b4..eac418a3a 100644 --- a/documentdb_tests/framework/preconditions.py +++ b/documentdb_tests/framework/preconditions.py @@ -56,11 +56,16 @@ "unforced_compact": "compact succeeds without force", "reindex": "reIndex is permitted", "local_rename": "renaming into the unreplicated local database is permitted", + "search": "search and vector search surfaces are available", } # The capabilities each (engine, topology) target has. To add an engine or # topology, add an entry here; every test then gates correctly. _CAPABILITIES_BY_PROFILE: dict[tuple[str, str], frozenset[str]] = { + # A replica set, wired to a mongot search sidecar so it also serves the + # search surfaces (see dev/compose.yaml). mongot is transparent to all other + # behavior, so this is a replica set that additionally has the search + # capability, not a distinct topology. ("mongodb", "replica_set"): frozenset( { "change_streams", @@ -70,6 +75,7 @@ "cluster_time", "cluster_read_concern", "quorum_write_concern", + "search", } ), ("mongodb", "standalone"): frozenset( From 6504ef4f6f664df7428239a8a22328937d4277da Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Fri, 19 Jun 2026 12:19:34 -0700 Subject: [PATCH 2/2] Add searchMeta stage tests Signed-off-by: Daniel Frankcom --- .../operator/stages/searchMeta/__init__.py | 0 .../test_searchMeta_collection_states.py | 213 +++++++++ .../searchMeta/test_searchMeta_facets.py | 410 ++++++++++++++++++ .../test_searchMeta_facets_advanced.py | 247 +++++++++++ .../searchMeta/test_searchMeta_index.py | 263 +++++++++++ .../searchMeta/test_searchMeta_metadata.py | 366 ++++++++++++++++ .../test_searchMeta_spec_count_errors.py | 358 +++++++++++++++ ...t_searchMeta_spec_facet_boundary_errors.py | 211 +++++++++ .../test_searchMeta_spec_facet_errors.py | 301 +++++++++++++ .../test_searchMeta_spec_operator_errors.py | 234 ++++++++++ .../test_searchMeta_string_facet.py | 162 +++++++ .../test_searchMeta_threshold_bounds.py | 113 +++++ .../searchMeta/test_smoke_searchMeta.py | 9 +- .../stages/searchMeta/utils/__init__.py | 0 .../searchMeta/utils/searchMeta_common.py | 110 +++++ .../stages/test_stages_position_searchMeta.py | 300 +++++++++++++ documentdb_tests/framework/error_codes.py | 1 + documentdb_tests/framework/property_checks.py | 17 + 18 files changed, 3308 insertions(+), 7 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_collection_states.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_facets.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_facets_advanced.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_index.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_metadata.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_count_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_facet_boundary_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_facet_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_operator_errors.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_string_facet.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_threshold_bounds.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/utils/searchMeta_common.py create mode 100644 documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_searchMeta.py diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_collection_states.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_collection_states.py new file mode 100644 index 000000000..6a8975cb0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_collection_states.py @@ -0,0 +1,213 @@ +"""Tests for $searchMeta on empty, unindexed, and nonexistent collections.""" + +from __future__ import annotations + +from collections.abc import Iterator + +import pytest +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + SEARCH_DOCS, + CollectionFixtureTestCase, + build_collection, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import INT64_ZERO + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def empty_search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Indexed but empty collection.""" + with build_collection( + engine_client, + worker_id, + f"{__name__}::empty_search_collection", + "searchmeta_empty", + [], + [{"name": "default", "definition": {"mappings": {"dynamic": True}}}], + ) as coll: + yield coll + + +# Property [Empty Collection Count]: on an indexed but empty collection, +# $searchMeta returns a zero count respecting the requested count.type, +# defaulting to lowerBound. +SEARCHMETA_EMPTY_COLLECTION_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "empty_default", + collection_fixture="empty_search_collection", + pipeline=[{"$searchMeta": {"text": {"query": "quick", "path": "title"}}}], + expected=[{"count": {"lowerBound": INT64_ZERO}}], + msg="$searchMeta should default to a lower-bound zero count on an indexed empty " + "collection", + ), + CollectionFixtureTestCase( + "empty_total", + collection_fixture="empty_search_collection", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "total"}, + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should return a total-zero count on an indexed empty collection when " + "count.type is total", + ), + CollectionFixtureTestCase( + "empty_lower_bound", + collection_fixture="empty_search_collection", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "lowerBound"}, + } + } + ], + expected=[{"count": {"lowerBound": INT64_ZERO}}], + msg="$searchMeta should return a lower-bound-zero count on an indexed empty collection " + "when count.type is lowerBound", + ), +] + + +@pytest.fixture(scope="module") +def no_index_collection(engine_client, worker_id) -> Iterator[Collection]: + """Populated collection with no search index.""" + with build_collection( + engine_client, + worker_id, + f"{__name__}::no_index_collection", + "searchmeta_no_index", + SEARCH_DOCS, + None, + ) as coll: + yield coll + + +# Property [No-Index Behavior]: on a populated collection with no search index, +# $searchMeta succeeds and returns a total-zero count that ignores the requested +# count.type, and a facet collector omits the facet field. +SEARCHMETA_NO_INDEX_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "no_index_default", + collection_fixture="no_index_collection", + pipeline=[{"$searchMeta": {"text": {"query": "quick", "path": "title"}}}], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should return a total-zero count without error on a populated " + "collection that has no search index", + ), + CollectionFixtureTestCase( + "no_index_lower_bound", + collection_fixture="no_index_collection", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "lowerBound"}, + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should ignore a lowerBound count.type and still key the no-index " + "count as total", + ), + CollectionFixtureTestCase( + "no_index_facet_omits_facet", + collection_fixture="no_index_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}} + } + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should omit the facet field and return a total-zero count for a facet " + "collector on a collection that has no search index", + ), +] + + +@pytest.fixture(scope="module") +def nonexistent_collection(engine_client, worker_id) -> Iterator[Collection]: + """Handle to a collection that is never created.""" + with build_collection( + engine_client, + worker_id, + f"{__name__}::nonexistent_collection", + "searchmeta_nonexistent", + None, + None, + ) as coll: + yield coll + + +# Property [Nonexistent Collection]: on a nonexistent collection, $searchMeta +# returns an empty result with no metadata document, for both an operator and a +# facet collector. +SEARCHMETA_NONEXISTENT_COLLECTION_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "nonexistent_operator", + collection_fixture="nonexistent_collection", + pipeline=[{"$searchMeta": {"text": {"query": "quick", "path": "title"}}}], + expected=[], + msg="$searchMeta should return an empty result with no metadata document for an " + "operator on a nonexistent collection", + ), + CollectionFixtureTestCase( + "nonexistent_facet", + collection_fixture="nonexistent_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}} + } + } + } + ], + expected=[], + msg="$searchMeta should return an empty result with no metadata document for a facet " + "collector on a nonexistent collection", + ), +] + +# All collection-state cases share one execution path; the state is carried as +# data via the fixture each case names. +SEARCHMETA_COLLECTION_STATE_TESTS: list[CollectionFixtureTestCase] = ( + SEARCHMETA_EMPTY_COLLECTION_TESTS + + SEARCHMETA_NO_INDEX_TESTS + + SEARCHMETA_NONEXISTENT_COLLECTION_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_COLLECTION_STATE_TESTS)) +def test_searchMeta_collection_state(engine_client, request, test_case: CollectionFixtureTestCase): + """Test $searchMeta across empty, no-index, and nonexistent collection states.""" + collection = request.getfixturevalue(test_case.collection_fixture) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_facets.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_facets.py new file mode 100644 index 000000000..4fc4df34d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_facets.py @@ -0,0 +1,410 @@ +"""Tests for $searchMeta number-facet bucketing result behavior.""" + +from __future__ import annotations + +from collections.abc import Iterator +from datetime import datetime, timezone + +import pytest +from bson import Int64 +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + CollectionFixtureTestCase, + build_collection, + open_search_collection, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import INT64_ZERO + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Module-scoped metadata search collection (default + alt_idx indexes).""" + with open_search_collection(engine_client, worker_id, f"{__name__}::search_collection") as coll: + yield coll + + +# Dates spanning two query buckets: ids 1-3 fall in the first interval and ids +# 4-5 in the second, so the asserted bucket counts (3 and 2) are observable. +_DATE_FACET_DOCS = [ + {"_id": 1, "d": datetime(2024, 1, 15, tzinfo=timezone.utc)}, + {"_id": 2, "d": datetime(2024, 2, 15, tzinfo=timezone.utc)}, + {"_id": 3, "d": datetime(2024, 3, 15, tzinfo=timezone.utc)}, + {"_id": 4, "d": datetime(2024, 6, 15, tzinfo=timezone.utc)}, + {"_id": 5, "d": datetime(2024, 9, 15, tzinfo=timezone.utc)}, +] + + +@pytest.fixture(scope="module") +def date_facet_collection(engine_client, worker_id) -> Iterator[Collection]: + """Indexed collection with an explicit date mapping for date faceting. + + The faceted field carries an explicit ``date`` mapping so the index types it + as a date rather than relying on dynamic mapping. + """ + with build_collection( + engine_client, + worker_id, + f"{__name__}::date_facet_collection", + "searchmeta_date_facet", + _DATE_FACET_DOCS, + [ + { + "name": "default", + "definition": {"mappings": {"dynamic": True, "fields": {"d": [{"type": "date"}]}}}, + } + ], + ) as coll: + yield coll + + +# Property [Facet Collector Envelope]: a facet collector returns a combined +# count sub-document and per-name buckets array; the count defaults to lowerBound +# and the embedded operator is optional (omitting it matches all documents). +SEARCHMETA_FACET_ENVELOPE_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "facet_envelope_with_operator", + collection_fixture="search_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "operator": {"text": {"query": "quick", "path": "title"}}, + "facets": { + "nf": {"type": "number", "path": "n", "boundaries": [0, 5, 10, 25]} + }, + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(3)}, + "facet": { + "nf": { + "buckets": [ + {"_id": 0, "count": Int64(1)}, + {"_id": 5, "count": Int64(1)}, + {"_id": 10, "count": Int64(1)}, + ] + } + }, + } + ], + msg="$searchMeta facet collector should return a combined count and facet buckets " + "envelope for an embedded operator", + ), + CollectionFixtureTestCase( + "facet_envelope_match_all", + collection_fixture="search_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 10, 25]}} + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "nf": { + "buckets": [ + {"_id": 0, "count": Int64(2)}, + {"_id": 10, "count": Int64(3)}, + ] + } + }, + } + ], + msg="$searchMeta facet collector should match all documents and default to a " + "lower-bound count when the operator is omitted", + ), +] + +# Property [Facet Number Bucket Boundaries]: each number-facet bucket _id is the +# lower boundary of its range, and float boundaries are preserved as double _id +# values. +SEARCHMETA_FACET_BOUNDARY_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "facet_number_float_boundaries", + collection_fixture="search_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "nf": { + "type": "number", + "path": "n", + "boundaries": [0.5, 10.5, 25.5], + } + } + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "nf": { + "buckets": [ + {"_id": 0.5, "count": Int64(3)}, + {"_id": 10.5, "count": Int64(2)}, + ] + } + }, + } + ], + msg="$searchMeta number facet should preserve float boundaries as double bucket _ids", + ), +] + +# Property [Facet Default Overflow Bucket]: a default overflow bucket is emitted +# only when default is set, is always emitted when set (including with zero +# overflow), and its string _id never collides with a numeric boundary _id. +SEARCHMETA_FACET_DEFAULT_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "facet_default_omitted_drops_overflow", + collection_fixture="search_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 5]}} + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": {"nf": {"buckets": [{"_id": 0, "count": Int64(1)}]}}, + } + ], + msg="$searchMeta number facet should drop out-of-range values when no default is set", + ), + CollectionFixtureTestCase( + "facet_default_emits_overflow", + collection_fixture="search_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "nf": { + "type": "number", + "path": "n", + "boundaries": [0, 5, 10], + "default": "over", + } + } + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "nf": { + "buckets": [ + {"_id": 0, "count": Int64(1)}, + {"_id": 5, "count": Int64(1)}, + {"_id": "over", "count": Int64(3)}, + ] + } + }, + } + ], + msg="$searchMeta number facet should collect out-of-range values into the default " + "overflow bucket when default is set", + ), + CollectionFixtureTestCase( + "facet_default_zero_overflow", + collection_fixture="search_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "nf": { + "type": "number", + "path": "n", + "boundaries": [0, 100], + "default": "over", + } + } + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "nf": { + "buckets": [ + {"_id": 0, "count": Int64(5)}, + {"_id": "over", "count": INT64_ZERO}, + ] + } + }, + } + ], + msg="$searchMeta number facet should emit the default overflow bucket with a zero " + "count when no values overflow", + ), + CollectionFixtureTestCase( + "facet_default_string_id_no_collision", + collection_fixture="search_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "nf": { + "type": "number", + "path": "n", + "boundaries": [0, 5], + "default": "0", + } + } + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "nf": { + "buckets": [ + {"_id": 0, "count": Int64(1)}, + {"_id": "0", "count": Int64(4)}, + ] + } + }, + } + ], + msg="$searchMeta number facet should keep a string default _id distinct from a " + "numeric boundary _id of the same digits", + ), +] + +# Property [Facet Zero-Match Buckets]: a zero-match facet query over an indexed +# collection returns the bucket structure with zero counts. +SEARCHMETA_FACET_ZERO_MATCH_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "facet_zero_match", + collection_fixture="search_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "operator": {"text": {"query": "nonexistentxyz", "path": "title"}}, + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 2, 25]}}, + } + } + } + ], + expected=[ + { + "count": {"lowerBound": INT64_ZERO}, + "facet": { + "nf": { + "buckets": [ + {"_id": 0, "count": INT64_ZERO}, + {"_id": 2, "count": INT64_ZERO}, + ] + } + }, + } + ], + msg="$searchMeta facet collector should return zero-count buckets for a zero-match " + "query", + ), +] + +# Property [Facet Date Bucket Boundaries]: each date-facet bucket _id is its +# range's lower-boundary datetime, and datetimes are bucketed like number facets. +SEARCHMETA_DATE_FACET_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "date_facet_boundaries", + collection_fixture="date_facet_collection", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "df": { + "type": "date", + "path": "d", + "boundaries": [ + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 4, 1, tzinfo=timezone.utc), + datetime(2024, 12, 1, tzinfo=timezone.utc), + ], + } + } + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "df": { + "buckets": [ + {"_id": datetime(2024, 1, 1, tzinfo=timezone.utc), "count": Int64(3)}, + {"_id": datetime(2024, 4, 1, tzinfo=timezone.utc), "count": Int64(2)}, + ] + } + }, + } + ], + msg="$searchMeta date facet should bucket datetimes by their lower boundary and echo " + "each bucket _id as the boundary datetime", + ), +] + +# Number facets run against the standard search collection; date faceting needs +# an index with an explicit date mapping, so its case names a different fixture. +# Both share one execution path, with the collection carried as data. +SEARCHMETA_FACET_RESULT_TESTS: list[CollectionFixtureTestCase] = ( + SEARCHMETA_FACET_ENVELOPE_TESTS + + SEARCHMETA_FACET_BOUNDARY_TESTS + + SEARCHMETA_FACET_DEFAULT_TESTS + + SEARCHMETA_FACET_ZERO_MATCH_TESTS + + SEARCHMETA_DATE_FACET_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_FACET_RESULT_TESTS)) +def test_searchMeta_facets(engine_client, request, test_case: CollectionFixtureTestCase): + """Test $searchMeta number- and date-facet bucket result behavior.""" + collection = request.getfixturevalue(test_case.collection_fixture) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_facets_advanced.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_facets_advanced.py new file mode 100644 index 000000000..68fcc82d3 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_facets_advanced.py @@ -0,0 +1,247 @@ +"""Tests for $searchMeta facet count modifiers, key echo, and multiple facets.""" + +from __future__ import annotations + +from collections.abc import Iterator + +import pytest +from bson import Int64 +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + open_search_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Module-scoped metadata search collection (default + alt_idx indexes).""" + with open_search_collection(engine_client, worker_id, f"{__name__}::search_collection") as coll: + yield coll + + +# Property [Facet Stage Count Modifier]: a stage-level count modifier changes +# only the count sub-document flavor and leaves the facet result unchanged. +SEARCHMETA_FACET_COUNT_MODIFIER_TESTS: list[StageTestCase] = [ + StageTestCase( + "facet_count_total", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}} + }, + "count": {"type": "total"}, + } + } + ], + expected=[ + { + "count": {"total": Int64(5)}, + "facet": {"nf": {"buckets": [{"_id": 0, "count": Int64(5)}]}}, + } + ], + msg="$searchMeta should apply a stage-level total count modifier on top of the facet " + "result without changing the buckets", + ), + StageTestCase( + "facet_count_lower_bound", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}} + }, + "count": {"type": "lowerBound"}, + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": {"nf": {"buckets": [{"_id": 0, "count": Int64(5)}]}}, + } + ], + msg="$searchMeta should apply a stage-level lower-bound count modifier on top of the " + "facet result without changing the buckets", + ), +] + +# Property [Facet Key Echo]: facet map keys are not field-path validated and are +# echoed back verbatim as result keys. +SEARCHMETA_FACET_KEY_TESTS: list[StageTestCase] = [ + StageTestCase( + f"facet_key_{suffix}", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {key: {"type": "number", "path": "n", "boundaries": [0, 25]}} + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": {key: {"buckets": [{"_id": 0, "count": Int64(5)}]}}, + } + ], + msg=f"$searchMeta should echo a {suffix} facet key verbatim without field-path " + "validation", + ) + for key, suffix in [ + ("", "empty"), + ("$x", "dollar_prefixed"), + ("a.b", "dotted"), + ] +] + +# Property [Multiple Facets in One Collector]: a collector with multiple named +# facets computes each independently under its own key while sharing one count +# sub-document; a default bucket on one facet does not affect a sibling without +# one. +SEARCHMETA_MULTI_FACET_TESTS: list[StageTestCase] = [ + StageTestCase( + "multi_facet_different_paths", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "nf": {"type": "number", "path": "n", "boundaries": [0, 10, 25]}, + "idf": {"type": "number", "path": "_id", "boundaries": [0, 3, 10]}, + } + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "nf": { + "buckets": [ + {"_id": 0, "count": Int64(2)}, + {"_id": 10, "count": Int64(3)}, + ] + }, + "idf": { + "buckets": [ + {"_id": 0, "count": Int64(2)}, + {"_id": 3, "count": Int64(3)}, + ] + }, + }, + } + ], + msg="$searchMeta should compute sibling facets over different paths independently " + "under a single shared count", + ), + StageTestCase( + "multi_facet_same_path_different_boundaries", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "nf1": {"type": "number", "path": "n", "boundaries": [0, 10, 25]}, + "nf2": {"type": "number", "path": "n", "boundaries": [0, 5, 25]}, + } + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "nf1": { + "buckets": [ + {"_id": 0, "count": Int64(2)}, + {"_id": 10, "count": Int64(3)}, + ] + }, + "nf2": { + "buckets": [ + {"_id": 0, "count": Int64(1)}, + {"_id": 5, "count": Int64(4)}, + ] + }, + }, + } + ], + msg="$searchMeta should compute sibling facets over the same path with different " + "boundaries independently", + ), + StageTestCase( + "multi_facet_independent_default", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "withd": { + "type": "number", + "path": "n", + "boundaries": [0, 5], + "default": "over", + }, + "nod": {"type": "number", "path": "n", "boundaries": [0, 5]}, + } + } + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(5)}, + "facet": { + "withd": { + "buckets": [ + {"_id": 0, "count": Int64(1)}, + {"_id": "over", "count": Int64(4)}, + ] + }, + "nod": {"buckets": [{"_id": 0, "count": Int64(1)}]}, + }, + } + ], + msg="$searchMeta should keep a default overflow bucket independent per facet so a " + "sibling without default drops its overflow", + ), +] + +SEARCHMETA_FACET_ADVANCED_TESTS: list[StageTestCase] = ( + SEARCHMETA_FACET_COUNT_MODIFIER_TESTS + + SEARCHMETA_FACET_KEY_TESTS + + SEARCHMETA_MULTI_FACET_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_FACET_ADVANCED_TESTS)) +def test_searchMeta_facets_advanced(search_collection, test_case: StageTestCase): + """Test $searchMeta facet count modifiers, key echo, and multiple facets.""" + result = execute_command( + search_collection, + { + "aggregate": search_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_index.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_index.py new file mode 100644 index 000000000..48fce590c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_index.py @@ -0,0 +1,263 @@ +"""Tests for $searchMeta index selection and query modifier behavior.""" + +from __future__ import annotations + +from collections.abc import Iterator + +import pytest +from bson import Int64 +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + open_search_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import INT64_ZERO + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Module-scoped metadata search collection (default + alt_idx indexes).""" + with open_search_collection(engine_client, worker_id, f"{__name__}::search_collection") as coll: + yield coll + + +# Property [Index Selection and Defaulting]: the index field names the search +# index to use, and omitting it or passing null selects the index named +# "default". +SEARCHMETA_INDEX_DEFAULTING_TESTS: list[StageTestCase] = [ + StageTestCase( + "index_explicit_default", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "index": "default", + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should select the default index when index is the literal " + '"default", matching the omitted-index result', + ), + StageTestCase( + "index_named_non_default", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "index": "alt_idx", + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should select a non-default index when its name matches", + ), + StageTestCase( + "index_null_default", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "index": None, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should treat a null index as field-absent and use the default index", + ), +] + +# Property [Index Nonexistent Silent Miss]: a well-formed but nonexistent index +# name returns a total-zero count without error; dollar-prefixed strings are +# taken as literal names, and a facet collector omits the facet field on a miss. +SEARCHMETA_INDEX_MISS_TESTS: list[StageTestCase] = [ + StageTestCase( + "index_nonexistent_silent_miss", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "index": "nope", + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should return a total-zero count without error for a " + "nonexistent index name", + ), + StageTestCase( + "index_dollar_field_literal", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "index": "$title", + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should treat a dollar-prefixed index as a literal nonexistent " + "name rather than a field path", + ), + StageTestCase( + "index_double_dollar_literal", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "index": "$$NOW", + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should treat a double-dollar index as a literal nonexistent " + "name rather than a system variable", + ), + StageTestCase( + "index_facet_nonexistent_omits_facet", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}} + }, + "index": "nope", + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should omit the facet field and return a total-zero count when " + "a facet collector targets a nonexistent index", + ), +] + +# Property [Index Name Matching and Character Handling]: index name matching is +# exact, case-sensitive, and not whitespace-trimmed; control characters, null +# bytes, Unicode, and long names are valid strings that miss silently with a +# total-zero count. +SEARCHMETA_INDEX_NAME_MATCHING_TESTS: list[StageTestCase] = [ + StageTestCase( + f"index_name_{suffix}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "index": name, + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg=f"$searchMeta should treat a {suffix} index name as a nonexistent name and " + "miss silently", + ) + for name, suffix in [ + ("Default", "capitalized_default"), + ("default ", "trailing_space_default"), + (" default", "leading_space_default"), + ("def ault", "embedded_space_default"), + ("a\x00b", "embedded_null_byte"), + ("\x00", "single_null_byte"), + # Control characters U+0001, U+0002, U+001F. + ("\x01\x02\x1f", "control_characters"), + ("\t", "tab"), + ("caf\u00e9_\u7d22\u5f15_\U0001f50d", "unicode"), + ("x" * 10_000, "long_name"), + ] +] + +# Property [Modifier Coexistence]: the count and index modifiers coexist with a +# search operator in the same spec without being mistaken for operators. +SEARCHMETA_MODIFIER_COEXISTENCE_TESTS: list[StageTestCase] = [ + StageTestCase( + "coexist_operator_count_index", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "total"}, + "index": "default", + } + } + ], + expected=[{"count": {"total": Int64(3)}}], + msg="$searchMeta should accept count and index modifiers alongside a search operator " + "without mistaking them for operators", + ), +] + +# Property [Zero-Match Count]: a query matching no documents on an indexed +# collection returns a zero count respecting the requested count.type, defaulting +# to lowerBound. +SEARCHMETA_ZERO_MATCH_TESTS: list[StageTestCase] = [ + StageTestCase( + "zero_match_default", + pipeline=[{"$searchMeta": {"text": {"query": "nonexistentxyz", "path": "title"}}}], + expected=[{"count": {"lowerBound": INT64_ZERO}}], + msg="$searchMeta should default to a lower-bound zero count for a zero-match query on " + "an indexed collection", + ), + StageTestCase( + "zero_match_total", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "nonexistentxyz", "path": "title"}, + "count": {"type": "total"}, + } + } + ], + expected=[{"count": {"total": INT64_ZERO}}], + msg="$searchMeta should return a total-zero count for a zero-match query when count.type " + "is total", + ), + StageTestCase( + "zero_match_lower_bound", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "nonexistentxyz", "path": "title"}, + "count": {"type": "lowerBound"}, + } + } + ], + expected=[{"count": {"lowerBound": INT64_ZERO}}], + msg="$searchMeta should return a lower-bound-zero count for a zero-match query when " + "count.type is lowerBound", + ), +] + +SEARCHMETA_INDEX_TESTS: list[StageTestCase] = ( + SEARCHMETA_INDEX_DEFAULTING_TESTS + + SEARCHMETA_INDEX_MISS_TESTS + + SEARCHMETA_INDEX_NAME_MATCHING_TESTS + + SEARCHMETA_MODIFIER_COEXISTENCE_TESTS + + SEARCHMETA_ZERO_MATCH_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_INDEX_TESTS)) +def test_searchMeta_index(search_collection, test_case: StageTestCase): + """Test $searchMeta index selection and query modifier behavior.""" + result = execute_command( + search_collection, + { + "aggregate": search_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_metadata.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_metadata.py new file mode 100644 index 000000000..2642902cd --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_metadata.py @@ -0,0 +1,366 @@ +"""Tests for $searchMeta metadata and count result semantics.""" + +from __future__ import annotations + +from collections.abc import Iterator +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + open_search_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + EXPRESSION_NOT_OBJECT_ERROR, + FAILED_TO_PARSE_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Gte, Lte +from documentdb_tests.framework.test_constants import DECIMAL128_ZERO, INT32_MAX + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Module-scoped metadata search collection (default + alt_idx indexes).""" + with open_search_collection(engine_client, worker_id, f"{__name__}::search_collection") as coll: + yield coll + + +# Property [Metadata Result Semantics]: any recognized search operator returns a +# lowerBound count reflecting its match count, never the matching documents. +SEARCHMETA_METADATA_TESTS: list[StageTestCase] = [ + StageTestCase( + "metadata_text", + pipeline=[{"$searchMeta": {"text": {"query": "quick", "path": "title"}}}], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should return the match count for a text operator as metadata", + ), + StageTestCase( + "metadata_phrase", + pipeline=[{"$searchMeta": {"phrase": {"query": "brown fox", "path": "title"}}}], + expected=[{"count": {"lowerBound": Int64(1)}}], + msg="$searchMeta should return the match count for a phrase operator as metadata", + ), + StageTestCase( + "metadata_wildcard", + pipeline=[ + { + "$searchMeta": { + "wildcard": { + "query": "quick*", + "path": "title", + "allowAnalyzedField": True, + } + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should return the match count for a wildcard operator as metadata", + ), + StageTestCase( + "metadata_exists", + pipeline=[{"$searchMeta": {"exists": {"path": "title"}}}], + expected=[{"count": {"lowerBound": Int64(5)}}], + msg="$searchMeta should return the match count for an exists operator as metadata", + ), + StageTestCase( + "metadata_equals", + pipeline=[{"$searchMeta": {"equals": {"path": "n", "value": 5}}}], + expected=[{"count": {"lowerBound": Int64(1)}}], + msg="$searchMeta should return the match count for an equals operator as metadata", + ), + StageTestCase( + "metadata_range", + pipeline=[{"$searchMeta": {"range": {"path": "n", "gte": 5, "lte": 15}}}], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should return the match count for a range operator as metadata", + ), + StageTestCase( + "metadata_near", + pipeline=[{"$searchMeta": {"near": {"path": "n", "origin": 10, "pivot": 5}}}], + expected=[{"count": {"lowerBound": Int64(5)}}], + msg="$searchMeta should return the match count for a near operator as metadata", + ), + StageTestCase( + "metadata_compound", + pipeline=[ + {"$searchMeta": {"compound": {"must": [{"text": {"query": "quick", "path": "title"}}]}}} + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should return the match count for a compound operator as metadata", + ), +] + +# Property [Count Result Shape and Defaults]: count.type selects the result +# flavor (an exact {count:{total:n}} for total, a {count:{lowerBound:n}} for +# lowerBound), and an empty, null, or type-less count defaults to a +# lower-bound count. +SEARCHMETA_COUNT_SHAPE_TESTS: list[StageTestCase] = [ + StageTestCase( + "count_type_total", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "total"}, + } + } + ], + expected=[{"count": {"total": Int64(3)}}], + msg="$searchMeta count.type total should return an exact total count", + ), + StageTestCase( + "count_type_lower_bound", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "lowerBound"}, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta count.type lowerBound should return a lower-bound count", + ), + StageTestCase( + "count_default_empty", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {}, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should default to a lower-bound count when count is an empty document", + ), + StageTestCase( + "count_default_null", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": None, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should default to a lower-bound count when count is null", + ), + StageTestCase( + "count_default_type_null", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": None}, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should default to a lower-bound count when count.type is null", + ), + StageTestCase( + "count_threshold_no_type", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"threshold": 10}, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should default to a lower-bound count when a threshold has no type", + ), +] + +# Property [Count Threshold Exactness]: count.type total returns an exact count +# regardless of threshold, and count.type lowerBound returns the exact match +# count whenever the threshold is at least the match count. +SEARCHMETA_THRESHOLD_EXACT_TESTS: list[StageTestCase] = [ + StageTestCase( + "threshold_total_ignores_below_match", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "total", "threshold": 1}, + } + } + ], + expected=[{"count": {"total": Int64(3)}}], + msg="$searchMeta count.type total should ignore a threshold below the match count and " + "stay exact", + ), + StageTestCase( + "threshold_lower_bound_exact_when_equal", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "lowerBound", "threshold": 3}, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta count.type lowerBound should be exact when threshold equals the " + "match count", + ), +] + +# Property [Count Threshold Type Acceptance]: count.threshold accepts any +# non-negative integer-valued number (int32, in-range int64, whole-number +# double), including the boundaries 0 and int32 max, without error. +SEARCHMETA_THRESHOLD_TYPE_TESTS: list[StageTestCase] = [ + StageTestCase( + "threshold_type_int32_zero", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "lowerBound", "threshold": 0}, + } + } + ], + expected={"count": {"lowerBound": [Gte(0), Lte(3)]}}, + msg="$searchMeta should accept a zero threshold and return a count within the bound", + ), + StageTestCase( + "threshold_type_int32_max", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "lowerBound", "threshold": INT32_MAX}, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should accept an int32-max threshold boundary", + ), + StageTestCase( + "threshold_type_int64", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "lowerBound", "threshold": Int64(5)}, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should accept an in-range int64 threshold", + ), + StageTestCase( + "threshold_type_double_whole", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "lowerBound", "threshold": 5.0}, + } + } + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should accept a whole-number double threshold", + ), +] + +SEARCHMETA_RESULT_TESTS: list[StageTestCase] = ( + SEARCHMETA_METADATA_TESTS + + SEARCHMETA_COUNT_SHAPE_TESTS + + SEARCHMETA_THRESHOLD_EXACT_TESTS + + SEARCHMETA_THRESHOLD_TYPE_TESTS +) + +# Property [Stage Value Scalar Type Error]: a scalar or null stage value is +# rejected at parse time as not an object. +SEARCHMETA_STAGE_VALUE_SCALAR_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"stage_value_scalar_{tid}", + pipeline=[{"$searchMeta": val}], + error_code=EXPRESSION_NOT_OBJECT_ERROR, + msg=f"$searchMeta should reject a {tid} stage value as a non-object", + ) + for tid, val in [ + ("string", "x"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", DECIMAL128_ZERO), + ("bool", True), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ("null", None), + ] +] + +# Property [Stage Value Array Type Error]: an array stage value, including an +# empty array, is rejected at parse time and distinguished from a scalar. +SEARCHMETA_STAGE_VALUE_ARRAY_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "stage_value_array_empty", + pipeline=[{"$searchMeta": []}], + error_code=FAILED_TO_PARSE_ERROR, + msg="$searchMeta should reject an empty array stage value as a non-object", + ), + StageTestCase( + "stage_value_array_non_empty", + pipeline=[{"$searchMeta": [{"text": {"query": "quick", "path": "title"}}]}], + error_code=FAILED_TO_PARSE_ERROR, + msg="$searchMeta should reject a non-empty array stage value as a non-object", + ), +] + +SEARCHMETA_STAGE_VALUE_ERROR_TESTS: list[StageTestCase] = ( + SEARCHMETA_STAGE_VALUE_SCALAR_ERROR_TESTS + SEARCHMETA_STAGE_VALUE_ARRAY_ERROR_TESTS +) + +# Result/count semantics and stage-value type errors share one execution path. +SEARCHMETA_TESTS: list[StageTestCase] = SEARCHMETA_RESULT_TESTS + SEARCHMETA_STAGE_VALUE_ERROR_TESTS + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_TESTS)) +def test_searchMeta_metadata(search_collection, test_case: StageTestCase): + """Test $searchMeta metadata, count result semantics, and stage-value errors.""" + result = execute_command( + search_collection, + { + "aggregate": search_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_count_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_count_errors.py new file mode 100644 index 000000000..dbec9cffd --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_count_errors.py @@ -0,0 +1,358 @@ +"""Tests for $searchMeta count specification errors.""" + +from __future__ import annotations + +from collections.abc import Iterator +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Decimal128, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + open_search_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import SEARCH_EXECUTOR_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import ( + DECIMAL128_ZERO, + FLOAT_INFINITY, + FLOAT_NAN, + FLOAT_NEGATIVE_INFINITY, + INT32_OVERFLOW, + INT32_UNDERFLOW, +) + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Module-scoped metadata search collection (default + alt_idx indexes).""" + with open_search_collection(engine_client, worker_id, f"{__name__}::search_collection") as coll: + yield coll + + +# Property [Count Not A Document]: a present count value that is not a document +# is rejected. +SEARCHMETA_COUNT_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_not_document_{tid}", + pipeline=[{"$searchMeta": {"text": {"query": "quick", "path": "title"}, "count": val}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {tid} count value as not a document", + ) + for tid, val in [ + ("string", "x"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", DECIMAL128_ZERO), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Count Type Not A String]: a count.type value that is not a string is +# rejected. +SEARCHMETA_COUNT_TYPE_NOT_STRING_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_type_not_string_{tid}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": val}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {tid} count.type value as not a string", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", DECIMAL128_ZERO), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Count Type Unknown String]: a count.type string outside +# {lowerBound, total} is rejected, with matching being case-sensitive and not +# whitespace-trimmed. +SEARCHMETA_COUNT_TYPE_VALUE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_type_value_{suffix}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": value}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {suffix} count.type string as an unknown count type", + ) + for value, suffix in [ + ("Total", "capitalized_total"), + ("total ", "trailing_space_total"), + ("foo", "foo"), + ] +] + +# Property [Count Threshold Non-Integer Type]: a count.threshold of a +# non-integer-valued type is rejected, including decimal128 even when its value +# is whole. +SEARCHMETA_COUNT_THRESHOLD_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_threshold_type_{tid}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"threshold": val}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {tid} count.threshold value as not an integer", + ) + for tid, val in [ + ("string", "x"), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("decimal128_whole", Decimal128("2")), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Count Threshold Non-Integral Double]: a count.threshold that is a +# fractional double or NaN is rejected. +SEARCHMETA_COUNT_THRESHOLD_FRACTIONAL_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_threshold_fractional_{suffix}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"threshold": val}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {suffix} count.threshold double as non-integral", + ) + for val, suffix in [ + (2.5, "fractional"), + (FLOAT_NAN, "nan"), + ] +] + +# Property [Count Threshold Negative]: a negative count.threshold is rejected. +SEARCHMETA_COUNT_THRESHOLD_NEGATIVE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_threshold_negative_{suffix}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"threshold": val}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {suffix} count.threshold as negative", + ) + for val, suffix in [ + (-1, "minus_one"), + (-2.0, "negative_double"), + ] +] + +# Property [Count Threshold Overflow]: a count.threshold above int32 max is +# rejected. +SEARCHMETA_COUNT_THRESHOLD_OVERFLOW_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_threshold_overflow_{suffix}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"threshold": val}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {suffix} count.threshold above int32 max", + ) + for val, suffix in [ + (INT32_OVERFLOW, "int32_max_plus_one"), + (FLOAT_INFINITY, "infinity"), + ] +] + +# Property [Count Threshold Underflow]: a count.threshold below int32 min is +# rejected. +SEARCHMETA_COUNT_THRESHOLD_UNDERFLOW_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_threshold_underflow_{suffix}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"threshold": val}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {suffix} count.threshold below int32 min", + ) + for val, suffix in [ + (INT32_UNDERFLOW, "int32_min_minus_one"), + (FLOAT_NEGATIVE_INFINITY, "negative_infinity"), + ] +] + +# Property [Count Threshold Validated For Total]: count.threshold is validated +# for integrality, sign, and range even when count.type is total, where its +# value is otherwise ignored. +SEARCHMETA_COUNT_THRESHOLD_TOTAL_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "count_threshold_total_fractional", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "total", "threshold": 2.5}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should validate count.threshold integrality even when count.type is " + "total", + ), + StageTestCase( + "count_threshold_total_negative", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "total", "threshold": -1}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should validate count.threshold sign even when count.type is total", + ), + StageTestCase( + "count_threshold_total_overflow", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": {"type": "total", "threshold": INT32_OVERFLOW}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should validate count.threshold range even when count.type is total", + ), +] + +# Property [Count Unrecognized Sub-Field]: an unrecognized sub-field of count is +# rejected, with matching being case-sensitive. +SEARCHMETA_COUNT_UNKNOWN_FIELD_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"count_unknown_field_{suffix}", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "count": count_value, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject an unrecognized count sub-field {name!r}", + ) + for count_value, name, suffix in [ + ({"type": "total", "foo": 1}, "foo", "alongside_type"), + ({"bar": 1}, "bar", "solo"), + ({"Type": "total"}, "Type", "capitalized_type"), + ] +] + +SEARCHMETA_SPEC_COUNT_ERROR_TESTS: list[StageTestCase] = ( + SEARCHMETA_COUNT_TYPE_ERROR_TESTS + + SEARCHMETA_COUNT_TYPE_NOT_STRING_ERROR_TESTS + + SEARCHMETA_COUNT_TYPE_VALUE_ERROR_TESTS + + SEARCHMETA_COUNT_THRESHOLD_TYPE_ERROR_TESTS + + SEARCHMETA_COUNT_THRESHOLD_FRACTIONAL_ERROR_TESTS + + SEARCHMETA_COUNT_THRESHOLD_NEGATIVE_ERROR_TESTS + + SEARCHMETA_COUNT_THRESHOLD_OVERFLOW_ERROR_TESTS + + SEARCHMETA_COUNT_THRESHOLD_UNDERFLOW_ERROR_TESTS + + SEARCHMETA_COUNT_THRESHOLD_TOTAL_ERROR_TESTS + + SEARCHMETA_COUNT_UNKNOWN_FIELD_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_SPEC_COUNT_ERROR_TESTS)) +def test_searchMeta_spec_count_errors(search_collection, test_case: StageTestCase): + """Test $searchMeta count specification errors.""" + result = execute_command( + search_collection, + { + "aggregate": search_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_facet_boundary_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_facet_boundary_errors.py new file mode 100644 index 000000000..8b9eec54d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_facet_boundary_errors.py @@ -0,0 +1,211 @@ +"""Tests for $searchMeta facet boundary, numBuckets, and token-mapping errors.""" + +from __future__ import annotations + +from collections.abc import Iterator + +import pytest +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + open_search_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import SEARCH_EXECUTOR_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Module-scoped metadata search collection (default + alt_idx indexes).""" + with open_search_collection(engine_client, worker_id, f"{__name__}::search_collection") as coll: + yield coll + + +# Property [Facet Number Boundaries Validation]: number-facet boundaries must be +# distinct ascending numeric values; too few, non-ascending, or non-numeric are +# rejected. +SEARCHMETA_FACET_BOUNDARIES_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "boundaries_single_element", + pipeline=[ + { + "$searchMeta": { + "facet": {"facets": {"nf": {"type": "number", "path": "n", "boundaries": [0]}}} + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a number facet with fewer than two boundaries", + ), + StageTestCase( + "boundaries_unsorted", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [40, 0, 20]}} + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject non-ascending number facet boundaries", + ), + StageTestCase( + "boundaries_non_numeric", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": ["a", "b"]}} + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject non-numeric number facet boundaries", + ), +] + +# Property [Facet Date Boundaries Type]: a date facet with numeric (non-datetime) +# boundaries is rejected. +SEARCHMETA_FACET_DATE_BOUNDARIES_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "date_boundaries_numeric", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "date", "path": "n", "boundaries": [0, 25]}} + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject numeric boundaries for a date facet", + ), +] + +# Property [Facet NumBuckets Bounds]: a string-facet numBuckets outside +# [1..1000] is rejected. +SEARCHMETA_FACET_NUMBUCKETS_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "numbuckets_zero", + pipeline=[ + { + "$searchMeta": { + "facet": {"facets": {"nf": {"type": "string", "path": "cat", "numBuckets": 0}}} + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a numBuckets below the lower bound", + ), + StageTestCase( + "numbuckets_above_max", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "string", "path": "cat", "numBuckets": 1001}} + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a numBuckets above the upper bound", + ), +] + +# Property [Facet String Boundaries Unrecognized]: a string facet rejects the +# boundaries field as unrecognized. +SEARCHMETA_FACET_STRING_BOUNDARIES_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "string_facet_with_boundaries", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "string", "path": "cat", "boundaries": [0, 25]}} + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject boundaries on a string facet as an unrecognized field", + ), +] + +# Property [Facet Number NumBuckets Unrecognized]: a number facet rejects the +# numBuckets field as unrecognized. +SEARCHMETA_FACET_NUMBER_NUMBUCKETS_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "number_facet_with_numbuckets", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "nf": { + "type": "number", + "path": "n", + "boundaries": [0, 25], + "numBuckets": 1, + } + } + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject numBuckets on a number facet as an unrecognized field", + ), +] + +# Property [Facet Token Mapping]: a string facet on a dynamically-indexed field +# is rejected because dynamic mapping does not token-index string fields. +SEARCHMETA_FACET_TOKEN_MAPPING_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "string_facet_dynamic_field", + pipeline=[ + {"$searchMeta": {"facet": {"facets": {"nf": {"type": "string", "path": "cat"}}}}} + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject string faceting on a dynamically-indexed string field", + ), +] + +SEARCHMETA_SPEC_FACET_BOUNDARY_ERROR_TESTS: list[StageTestCase] = ( + SEARCHMETA_FACET_BOUNDARIES_ERROR_TESTS + + SEARCHMETA_FACET_DATE_BOUNDARIES_ERROR_TESTS + + SEARCHMETA_FACET_NUMBUCKETS_ERROR_TESTS + + SEARCHMETA_FACET_STRING_BOUNDARIES_ERROR_TESTS + + SEARCHMETA_FACET_NUMBER_NUMBUCKETS_ERROR_TESTS + + SEARCHMETA_FACET_TOKEN_MAPPING_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_SPEC_FACET_BOUNDARY_ERROR_TESTS)) +def test_searchMeta_spec_facet_boundary_errors(search_collection, test_case: StageTestCase): + """Test $searchMeta facet boundary, numBuckets, and token-mapping errors.""" + result = execute_command( + search_collection, + { + "aggregate": search_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_facet_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_facet_errors.py new file mode 100644 index 000000000..244aeba06 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_facet_errors.py @@ -0,0 +1,301 @@ +"""Tests for $searchMeta facet collector and definition spec errors.""" + +from __future__ import annotations + +from collections.abc import Iterator +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + open_search_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import SEARCH_EXECUTOR_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ZERO + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Module-scoped metadata search collection (default + alt_idx indexes).""" + with open_search_collection(engine_client, worker_id, f"{__name__}::search_collection") as coll: + yield coll + + +# Property [Facet Value Not A Document]: a present facet value that is not a +# document is rejected. +SEARCHMETA_FACET_VALUE_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"facet_not_document_{tid}", + pipeline=[{"$searchMeta": {"facet": val}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {tid} facet value as not a document", + ) + for tid, val in [ + ("string", "x"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", DECIMAL128_ZERO), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Facet Facets Required]: a facet collector whose facets field is +# omitted or null is rejected. +SEARCHMETA_FACETS_REQUIRED_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "facets_omitted", + pipeline=[{"$searchMeta": {"facet": {}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a facet collector with facets omitted", + ), + StageTestCase( + "facets_null", + pipeline=[{"$searchMeta": {"facet": {"facets": None}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should treat a null facets value as field-absent and require facets", + ), +] + +# Property [Facet Facets Not A Document]: a present facet.facets value that is +# not a document is rejected. +SEARCHMETA_FACETS_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"facets_not_document_{tid}", + pipeline=[{"$searchMeta": {"facet": {"facets": val}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {tid} facet.facets value as not a document", + ) + for tid, val in [ + ("string", "x"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", DECIMAL128_ZERO), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Facet Facets Empty]: an empty facet.facets document is rejected. +SEARCHMETA_FACETS_EMPTY_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "facets_empty", + pipeline=[{"$searchMeta": {"facet": {"facets": {}}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject an empty facet.facets document", + ), +] + +# Property [Facet Operator Empty]: an empty facet.operator document is rejected. +SEARCHMETA_FACET_OPERATOR_EMPTY_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "facet_operator_empty", + pipeline=[ + { + "$searchMeta": { + "facet": { + "operator": {}, + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}}, + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject an empty facet operator by listing the valid operators", + ), +] + +# Property [Facet Definition Required Fields]: a facet definition missing its +# type or path is rejected. +SEARCHMETA_FACET_DEF_REQUIRED_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "facet_def_missing_type", + pipeline=[ + {"$searchMeta": {"facet": {"facets": {"nf": {"path": "n", "boundaries": [0, 25]}}}}} + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a facet definition missing its type", + ), + StageTestCase( + "facet_def_missing_path", + pipeline=[ + { + "$searchMeta": { + "facet": {"facets": {"nf": {"type": "number", "boundaries": [0, 25]}}} + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a facet definition missing its path", + ), +] + +# Property [Facet Definition Type Value]: a facet definition type outside +# {date, number, string} is rejected. +SEARCHMETA_FACET_DEF_TYPE_VALUE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "facet_def_type_unknown", + pipeline=[ + { + "$searchMeta": { + "facet": {"facets": {"nf": {"type": "foo", "path": "n", "boundaries": [0, 25]}}} + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a facet definition type outside the allowed set", + ), +] + +# Property [Facet Definition Not A Document]: a facet definition value that is +# not a document is rejected. +SEARCHMETA_FACET_DEF_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"facet_def_not_document_{tid}", + pipeline=[{"$searchMeta": {"facet": {"facets": {"nf": val}}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {tid} facet definition value as not a document", + ) + for tid, val in [ + ("string", "x"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", DECIMAL128_ZERO), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Facet Unrecognized Field]: an unrecognized field at the facet +# definition or collector level, including a count modifier placed inside the +# collector, is rejected. +SEARCHMETA_FACET_UNKNOWN_FIELD_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "facet_def_unknown_field", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": { + "nf": { + "type": "number", + "path": "n", + "boundaries": [0, 25], + "bogus": 1, + } + } + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject an unrecognized field in a facet definition", + ), + StageTestCase( + "facet_collector_unknown_field", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}}, + "bogus": 1, + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject an unrecognized field at the facet collector level", + ), + StageTestCase( + "facet_collector_count_inside", + pipeline=[ + { + "$searchMeta": { + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}}, + "count": {"type": "total"}, + } + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a count modifier placed inside the facet collector", + ), +] + +SEARCHMETA_SPEC_FACET_ERROR_TESTS: list[StageTestCase] = ( + SEARCHMETA_FACET_VALUE_TYPE_ERROR_TESTS + + SEARCHMETA_FACETS_REQUIRED_ERROR_TESTS + + SEARCHMETA_FACETS_TYPE_ERROR_TESTS + + SEARCHMETA_FACETS_EMPTY_ERROR_TESTS + + SEARCHMETA_FACET_OPERATOR_EMPTY_ERROR_TESTS + + SEARCHMETA_FACET_DEF_REQUIRED_ERROR_TESTS + + SEARCHMETA_FACET_DEF_TYPE_VALUE_ERROR_TESTS + + SEARCHMETA_FACET_DEF_TYPE_ERROR_TESTS + + SEARCHMETA_FACET_UNKNOWN_FIELD_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_SPEC_FACET_ERROR_TESTS)) +def test_searchMeta_spec_facet_errors(search_collection, test_case: StageTestCase): + """Test $searchMeta facet collector and definition spec errors.""" + result = execute_command( + search_collection, + { + "aggregate": search_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_operator_errors.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_operator_errors.py new file mode 100644 index 000000000..afbe9b06f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_spec_operator_errors.py @@ -0,0 +1,234 @@ +"""Tests for $searchMeta operator/collector presence and index spec errors.""" + +from __future__ import annotations + +from collections.abc import Iterator +from datetime import datetime, timezone + +import pytest +from bson import ( + Binary, + Code, + Int64, + MaxKey, + MinKey, + ObjectId, + Regex, + Timestamp, +) +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + open_search_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import SEARCH_EXECUTOR_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import DECIMAL128_ZERO + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Module-scoped metadata search collection (default + alt_idx indexes).""" + with open_search_collection(engine_client, worker_id, f"{__name__}::search_collection") as coll: + yield coll + + +# Property [Operator Value Not A Document]: a present operator-key value that is +# not a document is rejected. +SEARCHMETA_OPERATOR_VALUE_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"operator_value_{tid}", + pipeline=[{"$searchMeta": {"text": val}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {tid} operator value as not a document", + ) + for tid, val in [ + ("string", "x"), + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", DECIMAL128_ZERO), + ("bool", True), + ("array", [1, 2]), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [No Operator Or Collector Present]: a spec that the server reads as +# having no recognized operator and no collector is rejected. +SEARCHMETA_NO_OPERATOR_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "no_operator_unknown_name", + pipeline=[{"$searchMeta": {"unknownop": {"query": "quick", "path": "title"}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject an unrecognized operator name as no operator present", + ), + StageTestCase( + "no_operator_empty_spec", + pipeline=[{"$searchMeta": {}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject an empty spec as no operator present", + ), + StageTestCase( + "no_operator_modifier_only", + pipeline=[{"$searchMeta": {"count": {"type": "total"}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a spec with only modifiers as no operator present", + ), + StageTestCase( + "no_operator_capitalized_key", + pipeline=[{"$searchMeta": {"Text": {"query": "quick", "path": "title"}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should treat a capitalized operator key as unrecognized, not present", + ), + StageTestCase( + "no_operator_untrimmed_key", + pipeline=[{"$searchMeta": {"text ": {"query": "quick", "path": "title"}}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should treat an untrimmed operator key as unrecognized, not present", + ), + StageTestCase( + "no_operator_value_null", + pipeline=[{"$searchMeta": {"text": None}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should treat a null operator value as field-absent, not present", + ), + StageTestCase( + "no_operator_facet_null", + pipeline=[{"$searchMeta": {"facet": None}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should treat a null facet value as field-absent, not present", + ), +] + +# Property [Conflicting Operators And Collector]: a spec carrying more than one +# query specifier (a top-level operator plus a facet collector, or two top-level +# operators) is rejected. +SEARCHMETA_OPERATOR_CONFLICT_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "conflict_operator_and_collector", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "facet": { + "facets": {"nf": {"type": "number", "path": "n", "boundaries": [0, 25]}} + }, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a spec carrying both an operator and a facet collector", + ), + StageTestCase( + "conflict_two_operators", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "quick", "path": "title"}, + "equals": {"path": "n", "value": 1}, + } + } + ], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject a spec carrying two top-level operators", + ), +] + +# Property [Index Not A String]: an index value that is not a string and not +# null is rejected. +SEARCHMETA_INDEX_TYPE_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"index_not_string_{tid}", + pipeline=[{"$searchMeta": {"text": {"query": "quick", "path": "title"}, "index": val}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {tid} index value as not a string", + ) + for tid, val in [ + ("int32", 42), + ("int64", Int64(1)), + ("double", 3.14), + ("decimal128", DECIMAL128_ZERO), + ("bool", True), + ("array", [1, 2]), + ("object", {"a": 1}), + ("objectid", ObjectId("507f1f77bcf86cd799439011")), + ("datetime", datetime(2024, 1, 1, tzinfo=timezone.utc)), + ("timestamp", Timestamp(1, 1)), + ("binary", Binary(b"\x01\x02\x03")), + ("regex", Regex(".*", "i")), + ("code", Code("function(){}")), + ("minkey", MinKey()), + ("maxkey", MaxKey()), + ] +] + +# Property [Index Empty String]: an empty-string index is rejected. +SEARCHMETA_INDEX_EMPTY_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + "index_empty_string", + pipeline=[{"$searchMeta": {"text": {"query": "quick", "path": "title"}, "index": ""}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg="$searchMeta should reject an empty-string index value", + ), +] + +# Property [Unknown Top-Level Option]: a top-level field that is not a recognized +# option is rejected; matching is exact, case-sensitive, and not +# whitespace-trimmed. ($search output options like sort/highlight are accepted as +# no-ops, so they are not unrecognized and not asserted here.) +SEARCHMETA_UNKNOWN_OPTION_ERROR_TESTS: list[StageTestCase] = [ + StageTestCase( + f"unknown_option_{suffix}", + pipeline=[{"$searchMeta": {"text": {"query": "quick", "path": "title"}, name: value}}], + error_code=SEARCH_EXECUTOR_ERROR, + msg=f"$searchMeta should reject a {suffix} top-level option as an unrecognized field", + ) + for name, value, suffix in [ + ("bogus", 1, "garbage_name"), + ("Index", "default", "capitalized_index"), + ("index ", "default", "trailing_space_index"), + ] +] + +SEARCHMETA_SPEC_OPERATOR_ERROR_TESTS: list[StageTestCase] = ( + SEARCHMETA_OPERATOR_VALUE_TYPE_ERROR_TESTS + + SEARCHMETA_NO_OPERATOR_ERROR_TESTS + + SEARCHMETA_OPERATOR_CONFLICT_ERROR_TESTS + + SEARCHMETA_INDEX_TYPE_ERROR_TESTS + + SEARCHMETA_INDEX_EMPTY_ERROR_TESTS + + SEARCHMETA_UNKNOWN_OPTION_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_SPEC_OPERATOR_ERROR_TESTS)) +def test_searchMeta_spec_operator_errors(search_collection, test_case: StageTestCase): + """Test $searchMeta operator/collector presence and index spec errors.""" + result = execute_command( + search_collection, + { + "aggregate": search_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_string_facet.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_string_facet.py new file mode 100644 index 000000000..28f6eeb69 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_string_facet.py @@ -0,0 +1,162 @@ +"""Tests for $searchMeta string-facet bucket selection and ordering.""" + +from __future__ import annotations + +from collections.abc import Iterator + +import pytest +from bson import Int64 +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + build_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.test_constants import INT64_ZERO + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def string_facet_collection(engine_client, worker_id) -> Iterator[Collection]: + """Indexed collection with a token mapping for string faceting. + + String facets are rejected under a plain dynamic mapping, so this collection + declares an explicit token mapping. Category counts (a=3, b=2, c=1) make + truncation and ordering by count observable. + """ + with build_collection( + engine_client, + worker_id, + f"{__name__}::string_facet_collection", + "searchmeta_string_facet", + [ + {"_id": 1, "cat": "a"}, + {"_id": 2, "cat": "b"}, + {"_id": 3, "cat": "a"}, + {"_id": 4, "cat": "c"}, + {"_id": 5, "cat": "b"}, + {"_id": 6, "cat": "a"}, + ], + [ + { + "name": "default", + "definition": { + "mappings": {"dynamic": True, "fields": {"cat": [{"type": "token"}]}} + }, + } + ], + ) as coll: + yield coll + + +# Property [Facet String NumBuckets]: numBuckets greater than the distinct value +# count returns all buckets, while fewer truncates to the top-N values by count. +SEARCHMETA_STRING_NUMBUCKETS_TESTS: list[StageTestCase] = [ + StageTestCase( + "string_numbuckets_all", + pipeline=[ + { + "$searchMeta": { + "facet": {"facets": {"sf": {"type": "string", "path": "cat", "numBuckets": 10}}} + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(6)}, + "facet": { + "sf": { + "buckets": [ + {"_id": "a", "count": Int64(3)}, + {"_id": "b", "count": Int64(2)}, + {"_id": "c", "count": Int64(1)}, + ] + } + }, + } + ], + msg="$searchMeta string facet should return all buckets when numBuckets exceeds the " + "distinct value count", + ), + StageTestCase( + "string_numbuckets_topn", + pipeline=[ + { + "$searchMeta": { + "facet": {"facets": {"sf": {"type": "string", "path": "cat", "numBuckets": 2}}} + } + } + ], + expected=[ + { + "count": {"lowerBound": Int64(6)}, + "facet": { + "sf": { + "buckets": [ + {"_id": "a", "count": Int64(3)}, + {"_id": "b", "count": Int64(2)}, + ] + } + }, + } + ], + msg="$searchMeta string facet should truncate to the top-N values by count when " + "numBuckets is below the distinct value count", + ), +] + +# Property [Facet Empty Buckets]: a no-match query yields an empty buckets array +# for a string facet, which emits only buckets backed by matching values. +SEARCHMETA_STRING_EMPTY_TESTS: list[StageTestCase] = [ + StageTestCase( + "string_facet_no_match_empty_buckets", + pipeline=[ + { + "$searchMeta": { + "facet": { + "operator": {"text": {"query": "nonexistentxyz", "path": "cat"}}, + "facets": {"sf": {"type": "string", "path": "cat"}}, + } + } + } + ], + expected=[ + { + "count": {"lowerBound": INT64_ZERO}, + "facet": {"sf": {"buckets": []}}, + } + ], + msg="$searchMeta string facet should return an empty buckets array when no document " + "matches the query", + ), +] + +SEARCHMETA_STRING_FACET_TESTS: list[StageTestCase] = ( + SEARCHMETA_STRING_NUMBUCKETS_TESTS + SEARCHMETA_STRING_EMPTY_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_STRING_FACET_TESTS)) +def test_searchMeta_string_facet(string_facet_collection, test_case: StageTestCase): + """Test $searchMeta string facet bucket selection and ordering.""" + result = execute_command( + string_facet_collection, + { + "aggregate": string_facet_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_threshold_bounds.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_threshold_bounds.py new file mode 100644 index 000000000..b142a13c2 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_searchMeta_threshold_bounds.py @@ -0,0 +1,113 @@ +"""Tests for $searchMeta count behavior above the exact-count threshold.""" + +from __future__ import annotations + +from collections.abc import Iterator + +import pytest +from bson import Int64 +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + LARGE_MATCH_COUNT, + build_collection, +) +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Gte, Lte + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def large_search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Large indexed collection where every document matches one query.""" + with build_collection( + engine_client, + worker_id, + f"{__name__}::large_search_collection", + "searchmeta_large", + [{"_id": i, "title": "widget"} for i in range(LARGE_MATCH_COUNT)], + [{"name": "default", "definition": {"mappings": {"dynamic": True}}}], + ) as coll: + yield coll + + +# Property [Count Type Above Threshold]: when the match count exceeds the +# threshold, count.type lowerBound returns a value between the threshold and the +# match count, while count.type total stays exact. +SEARCHMETA_THRESHOLD_BOUND_TESTS: list[StageTestCase] = [ + StageTestCase( + "threshold_bound_explicit_low", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "widget", "path": "title"}, + "count": {"type": "lowerBound", "threshold": 2000}, + } + } + ], + expected={"count": {"lowerBound": [Gte(2000), Lte(LARGE_MATCH_COUNT)]}}, + msg="$searchMeta count.type lowerBound should return at least the threshold and at " + "most the match count", + ), + StageTestCase( + "threshold_bound_explicit_high", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "widget", "path": "title"}, + "count": {"type": "lowerBound", "threshold": 5000}, + } + } + ], + expected={"count": {"lowerBound": [Gte(5000), Lte(LARGE_MATCH_COUNT)]}}, + msg="$searchMeta count.type lowerBound should track a higher threshold and stay at " + "most the match count", + ), + StageTestCase( + "threshold_bound_default", + pipeline=[{"$searchMeta": {"text": {"query": "widget", "path": "title"}}}], + expected={"count": {"lowerBound": [Gte(1000), Lte(LARGE_MATCH_COUNT)]}}, + msg="$searchMeta should apply a default threshold of 1000 when count is omitted, so the " + "result is at least 1000 and at most the match count", + ), + StageTestCase( + "threshold_bound_total_exact_above_threshold", + pipeline=[ + { + "$searchMeta": { + "text": {"query": "widget", "path": "title"}, + "count": {"type": "total"}, + } + } + ], + expected=[{"count": {"total": Int64(LARGE_MATCH_COUNT)}}], + msg="$searchMeta count.type total should return the exact match count even when it far " + "exceeds the threshold", + ), +] + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_THRESHOLD_BOUND_TESTS)) +def test_searchMeta_threshold_bounds(large_search_collection, test_case: StageTestCase): + """Test $searchMeta count behavior above the threshold.""" + result = execute_command( + large_search_collection, + { + "aggregate": large_search_collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_smoke_searchMeta.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_smoke_searchMeta.py index ec3983536..e250fcc2b 100644 --- a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_smoke_searchMeta.py +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/test_smoke_searchMeta.py @@ -1,18 +1,13 @@ -""" -Smoke test for $searchMeta stage. - -Tests basic $searchMeta stage functionality. -""" +"""Smoke test for the $searchMeta stage.""" import pytest from documentdb_tests.framework.assertions import assertSuccessPartial from documentdb_tests.framework.executor import execute_command -pytestmark = pytest.mark.smoke +pytestmark = [pytest.mark.smoke, pytest.mark.requires(search=True)] -@pytest.mark.skip(reason="Requires Atlas Search configuration - not available on standard MongoDB") def test_smoke_searchMeta(collection): """Test basic $searchMeta stage behavior.""" collection.insert_many([{"_id": 1, "title": "test document"}]) diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/utils/__init__.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/utils/searchMeta_common.py b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/utils/searchMeta_common.py new file mode 100644 index 000000000..e08560f76 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/searchMeta/utils/searchMeta_common.py @@ -0,0 +1,110 @@ +"""Shared infrastructure for $searchMeta stage tests.""" + +from __future__ import annotations + +import time +from collections.abc import Iterator +from contextlib import contextmanager +from dataclasses import dataclass +from typing import Any + +from pymongo.collection import Collection +from pymongo.database import Database + +from documentdb_tests.compatibility.tests.core.operator.stages.utils.stage_test_case import ( + StageTestCase, +) +from documentdb_tests.framework.fixtures import cleanup_database, generate_database_name + + +@dataclass(frozen=True) +class CollectionFixtureTestCase(StageTestCase): + """A $searchMeta case whose collection is supplied by a named fixture. + + ``collection_fixture`` names the module-scoped fixture that builds the + search-indexed collection, letting cases targeting different collection + states share one parametrized test. A test using this must declare + ``engine_client`` directly so target parametrization fires before the + fixture resolves via ``request.getfixturevalue``. + """ + + collection_fixture: str = "" + + +# Shared dataset; the match counts the tests assert are derived from these +# documents. +SEARCH_DOCS: list[dict[str, Any]] = [ + {"_id": 1, "title": "the quick brown fox", "n": 1, "cat": "a"}, + {"_id": 2, "title": "quick red fox", "n": 5, "cat": "b"}, + {"_id": 3, "title": "lazy brown dog", "n": 10, "cat": "a"}, + {"_id": 4, "title": "slow green turtle", "n": 15, "cat": "c"}, + {"_id": 5, "title": "quick small mouse", "n": 20, "cat": "b"}, +] + +# Match count for the large collection, chosen to exceed the lower-bound count's +# exact-count threshold. Referenced by the fixture and the test expectations. +LARGE_MATCH_COUNT = 10_000 + + +def wait_for_ready(db: Database, name: str) -> None: + """Block until every search index on the collection reports READY.""" + # A search index build is asynchronous; allow generous time to reach READY. + deadline = time.monotonic() + 60 + while time.monotonic() < deadline: + batch = db.command({"listSearchIndexes": name})["cursor"]["firstBatch"] + if batch and all(idx.get("status") == "READY" for idx in batch): + return + time.sleep(1) + raise TimeoutError(f"search index on {name!r} did not reach READY") + + +@contextmanager +def build_collection( + engine_client, worker_id, tag: str, coll_name: str, docs, indexes +) -> Iterator[Collection]: + """Yield a module-scoped collection built from a single recipe. + + ``tag`` namespaces the database so distinct fixtures and modules never + collide. ``docs=None`` leaves the collection uncreated, ``docs=[]`` creates + it empty, and a non-empty list creates and populates it. When ``indexes`` is + given they are built as search indexes and awaited to READY. + """ + db_name = generate_database_name(tag, worker_id) + # Database name is deterministic, so drop any leftover from a crashed run. + cleanup_database(engine_client, db_name) + db = engine_client[db_name] + coll = db[coll_name] + try: + if docs is None: + pass + elif docs: + coll.insert_many(docs) + else: + db.create_collection(coll.name) + if indexes: + db.command({"createSearchIndexes": coll.name, "indexes": indexes}) + wait_for_ready(db, coll.name) + yield coll + finally: + cleanup_database(engine_client, db_name) + + +@contextmanager +def open_search_collection(engine_client, worker_id, tag: str) -> Iterator[Collection]: + """Yield the standard metadata search collection. + + Carries a "default" and a non-default "alt_idx" index so index-selection + cases can target each by name. + """ + with build_collection( + engine_client, + worker_id, + tag, + "searchmeta", + SEARCH_DOCS, + [ + {"name": "default", "definition": {"mappings": {"dynamic": True}}}, + {"name": "alt_idx", "definition": {"mappings": {"dynamic": True}}}, + ], + ) as coll: + yield coll diff --git a/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_searchMeta.py b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_searchMeta.py new file mode 100644 index 000000000..674070878 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/operator/stages/test_stages_position_searchMeta.py @@ -0,0 +1,300 @@ +"""Tests for $searchMeta pipeline position constraints and stage combinations.""" + +from __future__ import annotations + +from collections.abc import Iterator + +import pytest +from bson import Int64 +from pymongo.collection import Collection + +from documentdb_tests.compatibility.tests.core.operator.stages.searchMeta.utils.searchMeta_common import ( # noqa: E501 + SEARCH_DOCS, + CollectionFixtureTestCase, + build_collection, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + FACET_PIPELINE_INVALID_STAGE_ERROR, + NOT_FIRST_STAGE_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +pytestmark = pytest.mark.requires(search=True) + + +@pytest.fixture(scope="module") +def noindex_collection(engine_client, worker_id) -> Iterator[Collection]: + """Provide a populated collection that has no search index. + + Position and context errors are parse-time rejections, so this collection + deliberately omits the search index to confirm the position check fires + regardless of index state and to let sub-pipeline cases self-reference an + existing collection. + """ + with build_collection( + engine_client, + worker_id, + f"{__name__}::noindex_collection", + "searchmeta_noindex", + SEARCH_DOCS, + None, + ) as coll: + yield coll + + +@pytest.fixture(scope="module") +def search_collection(engine_client, worker_id) -> Iterator[Collection]: + """Provide a populated collection with a READY dynamic search index. + + Building a search index is an expensive asynchronous operation, so the + collection is created once per module and shared across the placement cases, + which only read from it. + """ + with build_collection( + engine_client, + worker_id, + f"{__name__}::search_collection", + "searchmeta", + SEARCH_DOCS, + [{"name": "default", "definition": {"mappings": {"dynamic": True}}}], + ) as coll: + yield coll + + +# Property [Stage Position and Context]: a first-position $searchMeta permits a +# single benign trailing stage with the one metadata document passing through, +# and $searchMeta is allowed as the first stage of a $unionWith or $lookup +# sub-pipeline. +SEARCHMETA_PLACEMENT_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "placement_trailing_limit", + collection_fixture="search_collection", + pipeline=[ + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + {"$limit": 1}, + ], + expected=[{"count": {"lowerBound": Int64(3)}}], + msg="$searchMeta should permit a single trailing $limit and pass the one metadata " + "document through", + ), + CollectionFixtureTestCase( + "placement_unionwith_subpipeline", + collection_fixture="search_collection", + pipeline=[ + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + { + "$unionWith": { + "coll": "searchmeta", + "pipeline": [{"$searchMeta": {"text": {"query": "quick", "path": "title"}}}], + } + }, + ], + expected=[ + {"count": {"lowerBound": Int64(3)}}, + {"count": {"lowerBound": Int64(3)}}, + ], + msg="$searchMeta should be allowed as the first stage of a $unionWith sub-pipeline " + "alongside a first-position main $searchMeta", + ), + CollectionFixtureTestCase( + "placement_lookup_subpipeline", + collection_fixture="search_collection", + pipeline=[ + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + { + "$lookup": { + "from": "searchmeta", + "pipeline": [{"$searchMeta": {"text": {"query": "quick", "path": "title"}}}], + "as": "meta", + } + }, + ], + expected=[ + { + "count": {"lowerBound": Int64(3)}, + "meta": [{"count": {"lowerBound": Int64(3)}}], + } + ], + msg="$searchMeta should be allowed as the first stage of a $lookup sub-pipeline and " + "return the metadata document per joined row", + ), +] + +# Property [Stage Position and Context Errors]: $searchMeta is rejected with +# NOT_FIRST_STAGE_ERROR whenever it is not the first stage of its main pipeline +# or of a $unionWith/$lookup sub-pipeline, and with +# FACET_PIPELINE_INVALID_STAGE_ERROR inside a $facet sub-pipeline; the position +# check fires regardless of index state. +SEARCHMETA_POSITION_ERROR_TESTS: list[CollectionFixtureTestCase] = [ + CollectionFixtureTestCase( + "error_after_match", + collection_fixture="noindex_collection", + pipeline=[ + {"$match": {"n": {"$gt": 0}}}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="$searchMeta after a $match should be rejected as not the first stage", + ), + CollectionFixtureTestCase( + "error_after_project", + collection_fixture="noindex_collection", + pipeline=[ + {"$project": {"title": 1}}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="$searchMeta after a $project should be rejected as not the first stage", + ), + CollectionFixtureTestCase( + "error_after_limit", + collection_fixture="noindex_collection", + pipeline=[ + {"$limit": 1}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="$searchMeta after a $limit should be rejected as not the first stage", + ), + CollectionFixtureTestCase( + "error_after_addfields", + collection_fixture="noindex_collection", + pipeline=[ + {"$addFields": {"m": 1}}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="$searchMeta after an $addFields should be rejected as not the first stage", + ), + CollectionFixtureTestCase( + "error_after_empty_match", + collection_fixture="noindex_collection", + pipeline=[ + {"$match": {}}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="$searchMeta after an empty $match should still be rejected as not the first " + "stage because the empty $match is not optimized away", + ), + CollectionFixtureTestCase( + "error_second_searchMeta_adjacent", + collection_fixture="noindex_collection", + pipeline=[ + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="A second adjacent $searchMeta should be rejected as not the first stage", + ), + CollectionFixtureTestCase( + "error_second_searchMeta_separated", + collection_fixture="noindex_collection", + pipeline=[ + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + {"$limit": 1}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="A second $searchMeta separated from the first should be rejected as not the " + "first stage", + ), + CollectionFixtureTestCase( + "error_inside_facet_first", + collection_fixture="noindex_collection", + pipeline=[ + { + "$facet": { + "meta": [ + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + } + }, + ], + error_code=FACET_PIPELINE_INVALID_STAGE_ERROR, + msg="$searchMeta as the first stage of a $facet sub-pipeline should be rejected", + ), + CollectionFixtureTestCase( + "error_inside_facet_not_first", + collection_fixture="noindex_collection", + pipeline=[ + { + "$facet": { + "meta": [ + {"$limit": 1}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + } + }, + ], + error_code=FACET_PIPELINE_INVALID_STAGE_ERROR, + msg="$searchMeta inside a $facet sub-pipeline should be rejected regardless of its " + "position within the sub-pipeline", + ), + CollectionFixtureTestCase( + "error_unionwith_subpipeline_not_first", + collection_fixture="noindex_collection", + pipeline=[ + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + { + "$unionWith": { + "coll": "searchmeta_noindex", + "pipeline": [ + {"$match": {}}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + } + }, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="$searchMeta that is not first inside a $unionWith sub-pipeline should be " + "rejected as not the first stage", + ), + CollectionFixtureTestCase( + "error_lookup_subpipeline_not_first", + collection_fixture="noindex_collection", + pipeline=[ + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + { + "$lookup": { + "from": "searchmeta_noindex", + "pipeline": [ + {"$match": {}}, + {"$searchMeta": {"text": {"query": "quick", "path": "title"}}}, + ], + "as": "meta", + } + }, + ], + error_code=NOT_FIRST_STAGE_ERROR, + msg="$searchMeta that is not first inside a $lookup sub-pipeline should be " + "rejected as not the first stage", + ), +] + +SEARCHMETA_POSITION_TESTS: list[CollectionFixtureTestCase] = ( + SEARCHMETA_PLACEMENT_TESTS + SEARCHMETA_POSITION_ERROR_TESTS +) + + +@pytest.mark.aggregate +@pytest.mark.parametrize("test_case", pytest_params(SEARCHMETA_POSITION_TESTS)) +def test_searchMeta_position(engine_client, request, test_case: CollectionFixtureTestCase): + """Test $searchMeta pipeline position constraints, combinations, and rejections.""" + collection = request.getfixturevalue(test_case.collection_fixture) + result = execute_command( + collection, + { + "aggregate": collection.name, + "pipeline": test_case.pipeline, + "cursor": {}, + }, + ) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index 86884ce4c..1e1608047 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -6,6 +6,7 @@ BAD_VALUE_ERROR = 2 NO_SUCH_KEY_ERROR = 4 GRAPH_CONTAINS_CYCLE_ERROR = 5 +SEARCH_EXECUTOR_ERROR = 8 FAILED_TO_PARSE_ERROR = 9 UNAUTHORIZED_ERROR = 13 TYPE_MISMATCH_ERROR = 14 diff --git a/documentdb_tests/framework/property_checks.py b/documentdb_tests/framework/property_checks.py index 0ffb575cf..26c53a3d3 100644 --- a/documentdb_tests/framework/property_checks.py +++ b/documentdb_tests/framework/property_checks.py @@ -311,6 +311,23 @@ def __repr__(self) -> str: return f"{type(self).__name__}({self.minimum!r})" +class Lte(Check): + """Assert that the field is less than or equal to a value.""" + + def __init__(self, maximum: Any) -> None: + self.maximum = maximum + + def check(self, value: Any, path: str) -> str | None: + if value is _FIELD_ABSENT: + return f"expected '{path}' <= {self.maximum!r}, but field is missing" + if value > self.maximum: + return f"expected '{path}' <= {self.maximum!r}, got {value!r}" + return None + + def __repr__(self) -> str: + return f"{type(self).__name__}({self.maximum!r})" + + class NonEmptyStr(Check): """Assert that the field is a non-empty string.