From 351ce663e83f3eeecce8e689c59b3bea83125577 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 10:56:22 -0600
Subject: [PATCH 1/6] fix(sitesearch): vendor-neutral aggregation abstraction
(#35786)
Decouple SiteSearchAPI/SiteSearchWebAPI from Elasticsearch aggregation
types so Site Search can be served by OpenSearch in Phase 3.
- Reuse the existing neutral com.dotcms.content.index.domain.Aggregation
/ AggregationBucket DTOs (from #36026) instead of a new IndexAggregation
- Add neutral DotSearchException (unchecked) to replace ElasticsearchException
on the public API surface
- SiteSearchAPI: drop org.elasticsearch.* imports; neutral Aggregation
return type; createSiteSearchIndex throws DotSearchException
- SiteSearchWebAPI: remove InternalDateHistogram/StringTerms/Bucket casts
and the Joda DateTime import; getFacets distinguishes histogram vs terms
by aggregation type and feeds the legacy wrappers neutral buckets
- ESSiteSearchAPI: adapt ES results via Aggregation.from(); ES exception
throws -> DotSearchException
- Add date/numeric histogram support to the neutral Aggregation ES factory
(also fixes a latent CCE: the old getFacets cast the histogram key to
Joda DateTime, which is a java.time.ZonedDateTime in ES 7.x)
OSSiteSearchAPI is deferred to #34609 (not yet in the codebase);
Aggregation.fromOS() is already in place for it.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/ESSiteSearchAPI.java | 19 ++++----
.../content/index/domain/Aggregation.java | 6 +++
.../index/domain/AggregationBucket.java | 28 +++++++++++
.../index/domain/DotSearchException.java | 32 +++++++++++++
.../sitesearch/business/SiteSearchAPI.java | 6 +--
.../sitesearch/viewtool/SiteSearchWebAPI.java | 47 +++++++++++--------
6 files changed, 106 insertions(+), 32 deletions(-)
create mode 100644 dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
index 09f1f54c96b7..b8b0dc3cd8ce 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
@@ -14,6 +14,8 @@
import com.dotcms.content.elasticsearch.business.*;
import com.dotcms.content.elasticsearch.util.RestHighLevelClientProvider;
import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.enterprise.LicenseUtil;
import com.dotcms.enterprise.license.LicenseLevel;
import com.dotcms.enterprise.priv.util.SearchSourceBuilderUtil;
@@ -64,7 +66,6 @@
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
@@ -351,7 +352,7 @@ public void deactivateIndex(String indexName) throws DotDataException, IOExcepti
}
@Override
- public synchronized boolean createSiteSearchIndex(String indexName, String alias, int shards) throws ElasticsearchException, IOException {
+ public synchronized boolean createSiteSearchIndex(String indexName, String alias, int shards) throws DotSearchException, IOException {
if(indexName==null){
return false;
}
@@ -379,7 +380,7 @@ public synchronized boolean createSiteSearchIndex(String indexName, String alias
}
if(i++ > 300){
- throw new ElasticsearchException("index timed out creating");
+ throw new DotSearchException("index timed out creating");
}
}
@@ -634,7 +635,7 @@ public Map getAggregations ( String indexName, String query
}
if ( indexName == null || !IndexType.SITE_SEARCH.is(indexName) ) {
- throw new ElasticsearchException( indexName + " is not a sitesearch index or alias" );
+ throw new DotSearchException( indexName + " is not a sitesearch index or alias" );
}
//https://github.com/elasticsearch/elasticsearch/issues/2980
@@ -648,10 +649,10 @@ public Map getAggregations ( String indexName, String query
.timeout(TimeValue.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS)));
final SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- return response.getAggregations().asMap();
+ return Aggregation.from(response.getAggregations());
} catch ( ElasticsearchException | IOException e ) {
Logger.error( this.getClass(), "Error getting aggregations for query.\n" + e.getMessage(), e );
- throw new ElasticsearchException( "Error getting aggregations for query.\n" + e.getMessage(), e );
+ throw new DotSearchException( "Error getting aggregations for query.\n" + e.getMessage(), e );
}
}
@@ -669,7 +670,7 @@ public Map getFacets ( String indexName, String query ) thr
}
if ( indexName == null || !IndexType.SITE_SEARCH.is(indexName ) ) {
- throw new ElasticsearchException( indexName + " is not a sitesearch index or alias" );
+ throw new DotSearchException( indexName + " is not a sitesearch index or alias" );
}
//https://github.com/elasticsearch/elasticsearch/issues/2980
@@ -683,10 +684,10 @@ public Map getFacets ( String indexName, String query ) thr
.timeout(TimeValue.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS)));
final SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- return response.getAggregations().asMap();
+ return Aggregation.from(response.getAggregations());
} catch ( ElasticsearchException | IOException e ) {
Logger.error( this.getClass(), "Error getting Facets for query.\n" + e.getMessage(), e );
- throw new ElasticsearchException( "Error getting Facets for query.\n" + e.getMessage(), e );
+ throw new DotSearchException( "Error getting Facets for query.\n" + e.getMessage(), e );
}
}
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
index 48db102f1ed9..e94984f6ed3c 100644
--- a/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
@@ -88,6 +88,12 @@ private static Aggregation fromSingle(final org.elasticsearch.search.aggregation
builder.buckets(terms.getBuckets().stream()
.map(AggregationBucket::from)
.collect(Collectors.toList()));
+ } else if (esAgg instanceof org.elasticsearch.search.aggregations.bucket.histogram.Histogram) {
+ final org.elasticsearch.search.aggregations.bucket.histogram.Histogram histogram =
+ (org.elasticsearch.search.aggregations.bucket.histogram.Histogram) esAgg;
+ builder.buckets(histogram.getBuckets().stream()
+ .map(AggregationBucket::fromHistogram)
+ .collect(Collectors.toList()));
} else if (esAgg instanceof org.elasticsearch.search.aggregations.metrics.TopHits) {
final org.elasticsearch.search.aggregations.metrics.TopHits topHits =
(org.elasticsearch.search.aggregations.metrics.TopHits) esAgg;
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
index c8904dcc8c34..79929696d59d 100644
--- a/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
@@ -92,6 +92,34 @@ public static AggregationBucket from(
.build();
}
+ /**
+ * Creates a bucket from an Elasticsearch histogram bucket (date or numeric), including its
+ * sub-aggregations. The key is normalized to its numeric form so {@link #getKeyAsNumber()}
+ * returns the epoch-millis (date histogram) or the numeric interval (numeric histogram):
+ * a date-histogram key is a {@code java.time.ZonedDateTime} in ES 7.x, not a number, so it is
+ * converted to epoch-millis here rather than via {@code getKeyAsString()} (which yields a
+ * formatted date).
+ */
+ public static AggregationBucket fromHistogram(
+ final org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket esBucket) {
+ return builder()
+ .key(histogramKey(esBucket.getKey()))
+ .docCount(esBucket.getDocCount())
+ .subAggregations(Aggregation.from(esBucket.getAggregations()))
+ .build();
+ }
+
+ /** Normalizes a histogram bucket key to a numeric String ({@link #getKeyAsNumber()}-friendly). */
+ private static String histogramKey(final Object key) {
+ if (key instanceof java.time.ZonedDateTime) {
+ return String.valueOf(((java.time.ZonedDateTime) key).toInstant().toEpochMilli());
+ }
+ if (key instanceof Number) {
+ return String.valueOf(((Number) key).longValue());
+ }
+ return String.valueOf(key);
+ }
+
// -------------------------------------------------------------------------
// OS factories
// -------------------------------------------------------------------------
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java
new file mode 100644
index 000000000000..6a45e5d0186b
--- /dev/null
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java
@@ -0,0 +1,32 @@
+package com.dotcms.content.index.domain;
+
+import com.dotmarketing.exception.DotRuntimeException;
+
+/**
+ * Vendor-neutral search exception for the index abstraction layer.
+ *
+ *
Replaces {@code org.elasticsearch.ElasticsearchException} on the public surface of the
+ * search/site-search APIs so that callers — and the interfaces themselves — no longer couple to
+ * Elasticsearch (or any other engine) types. It is the neutral failure signal raised by both the
+ * Elasticsearch and OpenSearch providers when a search or index operation cannot be completed.
+ *
+ *
It extends {@link DotRuntimeException} (and therefore is unchecked) to mirror the unchecked
+ * nature of {@code ElasticsearchException}: existing callers that never declared a {@code catch}
+ * for the vendor exception keep compiling unchanged.
+ */
+public class DotSearchException extends DotRuntimeException {
+
+ private static final long serialVersionUID = 1L;
+
+ public DotSearchException(final String message) {
+ super(message);
+ }
+
+ public DotSearchException(final Throwable cause) {
+ super(cause);
+ }
+
+ public DotSearchException(final String message, final Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java b/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
index ac2031f1ac73..7a13c33847b1 100644
--- a/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
+++ b/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
@@ -5,10 +5,10 @@
import java.util.List;
import java.util.Map;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.search.aggregations.Aggregation;
import org.quartz.SchedulerException;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchConfig;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchPublishStatus;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult;
@@ -36,7 +36,7 @@ public interface SiteSearchAPI {
void deactivateIndex(String indexName) throws DotDataException, IOException;
- boolean createSiteSearchIndex(String indexName, String alias, int shards) throws ElasticsearchException, IOException;
+ boolean createSiteSearchIndex(String indexName, String alias, int shards) throws DotSearchException, IOException;
boolean setAlias(String indexName, final String alias);
diff --git a/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java b/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
index de09cbcff072..ed3bf36bb8dd 100644
--- a/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
+++ b/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
@@ -1,6 +1,8 @@
package com.dotmarketing.sitesearch.viewtool;
import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.AggregationBucket;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults;
import com.dotmarketing.beans.Host;
import com.dotmarketing.business.APILocator;
@@ -13,16 +15,11 @@
import com.dotmarketing.util.StringUtils;
import org.apache.velocity.tools.view.context.ViewContext;
import org.apache.velocity.tools.view.tools.ViewTool;
-import org.elasticsearch.search.aggregations.Aggregation;
-import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram;
-import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
-import org.elasticsearch.search.aggregations.bucket.terms.StringTerms.Bucket;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.*;
-import org.joda.time.DateTime;
public class SiteSearchWebAPI implements ViewTool {
@@ -173,15 +170,16 @@ public Map getFacets(final String indexName, final String query)
for (String key : aggregations.keySet()) {
final Aggregation aggregation = aggregations.get(key);
+ final String type = aggregation.getType();
- if (aggregation instanceof InternalDateHistogram) {
+ if (isHistogram(type)) {
internalFacet = new InternalWrapperCountDateHistogramFacet(aggregation.getName(),
- aggregation.getType(), ((InternalDateHistogram) aggregation).getBuckets());
- } else if (aggregation instanceof StringTerms) {
+ type, aggregation.getBuckets());
+ } else if (!aggregation.getBuckets().isEmpty()) {
internalFacet = new InternalWrapperStringTermsFacet(aggregation.getName(),
- aggregation.getType(), ((StringTerms) aggregation).getBuckets());
+ type, aggregation.getBuckets());
} else {
- internalFacet = new Facet(aggregation.getName(), aggregation.getType());
+ internalFacet = new Facet(aggregation.getName(), type);
}
internalFacets.put(key, internalFacet);
}
@@ -189,23 +187,32 @@ public Map getFacets(final String indexName, final String query)
return internalFacets;
}
+ /**
+ * A histogram aggregation (date or numeric) reports a vendor type containing
+ * {@code "histogram"} (e.g. {@code date_histogram}); its buckets carry numeric keys.
+ */
+ private static boolean isHistogram(final String type) {
+ return type != null && type.contains("histogram");
+ }
+
/**
* Internal wrapper class for backwards compatibility with the new Elastic Search in Site
* Search.
*
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class InternalWrapperCountDateHistogramFacet extends Facet {
private final List entries;
public InternalWrapperCountDateHistogramFacet(final String name, final String type,
- List entries) {
+ List entries) {
super(name, type);
this.entries = new ArrayList<>();
- for (final InternalDateHistogram.Bucket entry : entries) {
- this.entries.add(new CountEntry(((DateTime) entry.getKey()).getMillis(),
- entry.getDocCount()));
+ for (final AggregationBucket entry : entries) {
+ final Number key = entry.getKeyAsNumber();
+ final long time = key != null ? key.longValue() : 0L;
+ this.entries.add(new CountEntry(time, entry.getDocCount()));
}
}
@@ -237,20 +244,20 @@ public long getCount() {
* Internal wrapper class for backwards compatibility with the new Elastic Search in Site
* Search.
*
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class InternalWrapperStringTermsFacet extends Facet {
private List entries;
- public InternalWrapperStringTermsFacet(final String name, final String type, final List entries) {
+ public InternalWrapperStringTermsFacet(final String name, final String type, final List entries) {
super(name, type);
this.entries = new ArrayList<>();
- for (final Bucket entry : entries) {
+ for (final AggregationBucket entry : entries) {
this.entries
- .add(new InternalTermEntry(entry.getKey().toString(), entry.getDocCount()));
+ .add(new InternalTermEntry(entry.getKey(), entry.getDocCount()));
}
}
@@ -279,7 +286,7 @@ public long getCount() {
}
/**
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class Facet {
From b0646854130ce26fa2cb7823bbd281da08491f6b Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 12:29:53 -0600
Subject: [PATCH 2/6] feat(sitesearch): OpenSearch impl + phase-aware router
for Site Search (#35786)
Completes the vendor-neutral Site Search extraction begun in #35786 by adding
the OpenSearch implementation and a phase-aware router, so Site Search dual-writes
and reads correctly across the ES -> OS migration phases.
- OSSiteSearchAPI: @ApplicationScoped @Default OpenSearch implementation of
SiteSearchAPI. Search/aggregations via the generic client -> ContentSearchResponse
(mirrors OSSearchAPIImpl); doc put/delete via _doc PUT/DELETE; get via typed
client.get(...). Default site-search index resolved from VersionedIndicesAPI
(not the deprecated IndiciesAPI). Index names handled in logical space; the
.os tag forced by VersionedIndicesAPI is stripped on read.
- SiteSearchAPIImpl: PhaseRouter router mirroring IndexAPIImpl and
acting as the single fan-out point. Reads -> read provider; doc/index writes ->
write fan-out; listIndices/listClosedIndices merge in dual-write; Quartz task
methods route to a single provider (fan-out would double-schedule jobs).
- ESSiteSearchAPI: use raw ESIndexAPI instead of the IndexAPI router so the
SiteSearch router is the only fan-out point (avoids double dual-write).
- APILocator: SITESEARCH_API now returns SiteSearchAPIImpl.
- OSSiteSearchAPIIntegrationTest: lifecycle, doc round-trip, aggregations, and
default-index activation; registered in OpenSearchUpgradeSuite.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/ESSiteSearchAPI.java | 6 +-
.../sitesearch/OSSiteSearchAPI.java | 824 ++++++++++++++++++
.../sitesearch/SiteSearchAPIImpl.java | 288 ++++++
.../com/dotmarketing/business/APILocator.java | 3 +-
.../com/dotcms/OpenSearchUpgradeSuite.java | 4 +-
.../OSSiteSearchAPIIntegrationTest.java | 300 +++++++
6 files changed, 1422 insertions(+), 3 deletions(-)
create mode 100644 dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
create mode 100644 dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
create mode 100644 dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
index b8b0dc3cd8ce..3197bd4b0735 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
@@ -89,7 +89,11 @@ public ESSiteSearchAPI(final IndexAPI indexApi,
}
public ESSiteSearchAPI() {
- this(APILocator.getESIndexAPI(), new ESMappingAPIImpl(), APILocator.getIndiciesAPI());
+ // Use the vendor-specific ESIndexAPI directly (NOT APILocator.getESIndexAPI(), which returns
+ // the phase-aware IndexAPIImpl router). The SiteSearchAPIImpl router is the single fan-out
+ // point for the ES → OS migration; routing index ops through the neutral router here as well
+ // would dual-write a second time and create duplicate OpenSearch indices.
+ this(new ESIndexAPI(), new ESMappingAPIImpl(), APILocator.getIndiciesAPI());
}
/**
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
new file mode 100644
index 000000000000..c1d226d23460
--- /dev/null
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -0,0 +1,824 @@
+/*
+*
+* Copyright (c) 2025 dotCMS LLC
+* Use of this software is governed by the Business Source License included
+* in the LICENSE file found at in the root directory of software.
+* SPDX-License-Identifier: BUSL-1.1
+*
+*/
+
+package com.dotcms.enterprise.publishing.sitesearch;
+
+import com.dotcms.cdi.CDIUtils;
+import com.dotcms.content.elasticsearch.business.ContentletIndexAPIImpl;
+import com.dotcms.content.elasticsearch.business.ESMappingAPIImpl;
+import com.dotcms.content.elasticsearch.business.IndexType;
+import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.IndexTag;
+import com.dotcms.content.index.VersionedIndices;
+import com.dotcms.content.index.VersionedIndicesAPI;
+import com.dotcms.content.index.VersionedIndicesImpl;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.ContentSearchResponse;
+import com.dotcms.content.index.domain.DotSearchException;
+import com.dotcms.content.index.domain.SearchHit;
+import com.dotcms.content.index.domain.SearchHits;
+import com.dotcms.content.index.opensearch.MappingOperationsOS;
+import com.dotcms.content.index.opensearch.OSClientProvider;
+import com.dotcms.content.index.opensearch.OSIndexAPIImpl;
+import com.dotcms.enterprise.LicenseUtil;
+import com.dotcms.enterprise.license.LicenseLevel;
+import com.dotcms.publishing.job.SiteSearchJobProxy;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.exception.DotDataException;
+import com.dotmarketing.exception.DotRuntimeException;
+import com.dotmarketing.quartz.CronScheduledTask;
+import com.dotmarketing.quartz.QuartzUtils;
+import com.dotmarketing.quartz.ScheduledTask;
+import com.dotmarketing.quartz.TaskRuntimeValues;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.util.Logger;
+import com.dotmarketing.util.StringUtils;
+import com.dotmarketing.util.UUIDGenerator;
+import com.dotmarketing.util.UtilMethods;
+import com.dotmarketing.util.json.JSONArray;
+import com.dotmarketing.util.json.JSONException;
+import com.dotmarketing.util.json.JSONObject;
+import com.google.common.annotations.VisibleForTesting;
+import io.vavr.control.Try;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import javax.enterprise.context.ApplicationScoped;
+import javax.enterprise.inject.Default;
+import javax.inject.Inject;
+import org.opensearch.client.json.JsonpDeserializer;
+import org.opensearch.client.json.JsonpMapper;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.GetResponse;
+import org.opensearch.client.opensearch.core.SearchResponse;
+import org.opensearch.client.opensearch.generic.Bodies;
+import org.opensearch.client.opensearch.generic.Body;
+import org.opensearch.client.opensearch.generic.Requests;
+import org.opensearch.client.opensearch.generic.Response;
+import org.quartz.SchedulerException;
+
+/**
+ * OpenSearch implementation of {@link SiteSearchAPI}.
+ *
+ *
Vendor-specific counterpart to {@link ESSiteSearchAPI}. The two implementations are kept
+ * functionally symmetric and are selected at runtime by the {@link SiteSearchAPIImpl} router based
+ * on the migration phase. This class confines every {@code org.opensearch.*} type to its private
+ * helpers — the {@link SiteSearchAPI} contract it implements is vendor-neutral.
+ *
+ *
Index source of truth
+ *
Where {@link ESSiteSearchAPI} reads the active site-search index name from the legacy
+ * {@code IndiciesAPI}, this class uses {@link VersionedIndicesAPI} — the canonical OpenSearch index
+ * registry — via the {@code siteSearch} slot of the default ({@link VersionedIndices#OPENSEARCH_3X})
+ * versioned indices. Index lifecycle operations (create/list/delete/alias) are delegated to
+ * the OpenSearch {@link IndexAPI} provider ({@link OSIndexAPIImpl}) directly rather than the neutral
+ * router, because the {@link SiteSearchAPIImpl} router is already the single phase-aware fan-out point
+ * — routing through the neutral {@code IndexAPI} router here would dual-write a second time.
+ *
+ *
Index naming
+ *
Site-search index names are handled as plain logical names (e.g. {@code sitesearch_1718000000000}),
+ * exactly as in {@link ESSiteSearchAPI}: the cluster-id prefix is added only when a name reaches the
+ * OpenSearch client (via {@link IndexAPI#getNameWithClusterIDPrefix(String)}). The {@code .os}
+ * {@link com.dotcms.content.index.IndexTag} is intentionally not applied to site-search indices —
+ * production ES and OS run on separate clusters, and the site-search pointer lives in its own
+ * {@code siteSearch} slot, so there is no shared-name collision to disambiguate.
+ * TODO OS: revisit if single-cluster dual-write of site-search is ever required (then tag with
+ * {@code IndexTag.OS}).
+ *
+ * @author Fabrizio Araya
+ * @see ESSiteSearchAPI
+ * @see SiteSearchAPIImpl
+ * @see com.dotcms.content.index.opensearch.OSSearchAPIImpl
+ */
+@ApplicationScoped
+@Default
+public class OSSiteSearchAPI implements SiteSearchAPI {
+
+ /**
+ * Response deserializer with {@code TDocument} bound to {@code Object} (JSON objects become
+ * {@code Map}). The query body is sent through the low-level (generic) client so nested
+ * sub-aggregations are preserved; the bare {@code SearchResponse._DESERIALIZER} has no document
+ * deserializer bound and would fail on a hit carrying a {@code _source}. Mirrors
+ * {@link com.dotcms.content.index.opensearch.OSSearchAPIImpl}.
+ */
+ private static final JsonpDeserializer> SEARCH_RESPONSE_DESERIALIZER =
+ SearchResponse.createSearchResponseDeserializer(JsonpDeserializer.of(Object.class));
+
+ private final OSClientProvider clientProvider;
+ private final IndexAPI indexApi;
+ private final MappingOperationsOS mappingOperations;
+
+ /** CDI-managed constructor. */
+ @Inject
+ public OSSiteSearchAPI() {
+ this(CDIUtils.getBeanThrows(OSClientProvider.class),
+ CDIUtils.getBeanThrows(OSIndexAPIImpl.class),
+ CDIUtils.getBeanThrows(MappingOperationsOS.class));
+ }
+
+ /** Package-private constructor for testing. */
+ @VisibleForTesting
+ OSSiteSearchAPI(final OSClientProvider clientProvider,
+ final IndexAPI indexApi,
+ final MappingOperationsOS mappingOperations) {
+ this.clientProvider = clientProvider;
+ this.indexApi = indexApi;
+ this.mappingOperations = mappingOperations;
+ }
+
+ // =========================================================================
+ // Index listing
+ // =========================================================================
+
+ @Override
+ public List listIndices() {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return Collections.emptyList();
+ }
+ final List indices = indexApi.listIndices().stream()
+ .filter(IndexType.SITE_SEARCH::is)
+ .collect(Collectors.toList());
+
+ Collections.sort(indices);
+ Collections.reverse(indices);
+ setDefaultToSpecificPosition(indices, 0);
+ return indices;
+ }
+
+ /**
+ * Moves the active (default) site-search index to {@code indexPosition} of the list, mirroring
+ * {@link ESSiteSearchAPI} but resolving the default from {@link VersionedIndicesAPI}.
+ */
+ private void setDefaultToSpecificPosition(final List list, final int indexPosition) {
+ if (list == null || list.size() <= 1) {
+ return;
+ }
+ final String defaultIndice = defaultSiteSearchIndex().orElse(null);
+ if (UtilMethods.isSet(defaultIndice) && !list.isEmpty()) {
+ final int index = list.indexOf(defaultIndice);
+ if (index < 0) {
+ Logger.warn(this.getClass(), String.format(
+ "The default site search '%s' index was not found in the list of indices.",
+ defaultIndice));
+ } else {
+ list.remove(index);
+ list.add(indexPosition, defaultIndice);
+ }
+ }
+ }
+
+ @Override
+ public List listClosedIndices() {
+ final List indices = new ArrayList<>();
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return indices;
+ }
+ for (final String indexName : indexApi.getClosedIndexes()) {
+ if (IndexType.SITE_SEARCH.is(indexName)) {
+ indices.add(indexName);
+ }
+ }
+ Collections.sort(indices);
+ Collections.reverse(indices);
+ return indices;
+ }
+
+ // =========================================================================
+ // Search & aggregations
+ // =========================================================================
+
+ @Override
+ public SiteSearchResults search(final String query, final int start, final int rows) {
+ final SiteSearchResults results = new SiteSearchResults();
+ if (query == null) {
+ results.setError("null query");
+ return results;
+ }
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return results;
+ }
+ try {
+ return search(defaultSiteSearchIndex().orElse(null), query, start, rows);
+ } catch (final Exception e) {
+ results.setError(e.getMessage());
+ }
+ return results;
+ }
+
+ @Override
+ public SiteSearchResults search(String indexName, String query, final int offset, final int limit) {
+ if (!UtilMethods.isSet(query)) {
+ query = "*";
+ }
+ final SiteSearchResults results = new SiteSearchResults();
+
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return results;
+ }
+
+ final boolean isJson = StringUtils.isJson(query);
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\/");
+ }
+
+ results.setQuery(query);
+ results.setLimit(limit);
+ results.setOffset(offset);
+
+ try {
+ if (indexName == null) {
+ indexName = defaultSiteSearchIndex().orElse(null);
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index");
+ }
+ results.setIndex(indexName);
+
+ final JSONObject body;
+ if (!isJson) {
+ body = new JSONObject();
+ body.put("query", new JSONObject().put("query_string",
+ new JSONObject().put("query", query).put("default_field", "*")));
+ if (limit > 0) {
+ body.put("size", limit);
+ }
+ if (offset > 0) {
+ body.put("from", offset);
+ }
+ body.put("highlight", new JSONObject().put("fields",
+ new JSONObject().put("content", new JSONObject().put("fragment_size", 255))));
+ } else {
+ body = new JSONObject(query);
+ }
+
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), body);
+ results.setTook(response.tookMillis() + "ms");
+ if (!isJson) {
+ results.setQuery(body.toString());
+ }
+
+ final SearchHits hits = response.hits();
+ results.setTotalResults(hits.getTotalHits().value());
+
+ float maxScore = 0f;
+ for (final SearchHit hit : hits) {
+ final SiteSearchResult ssr = new SiteSearchResult(new HashMap<>(hit.getSourceAsMap()));
+ ssr.setScore(hit.getScore());
+ maxScore = Math.max(maxScore, hit.getScore());
+ // TODO OS: the neutral SearchHit DTO does not carry per-field highlights yet.
+ // Site-search highlights are a best-effort extra (the ES path also swallows
+ // highlight failures); set empty until the neutral hit exposes highlight fragments.
+ ssr.setHighLight(new String[0]);
+ results.getResults().add(ssr);
+ }
+ results.setMaxScore(maxScore);
+
+ } catch (final Exception e) {
+ Logger.error(OSSiteSearchAPI.class, e.getMessage(), e);
+ results.setError(e.getMessage());
+ }
+
+ return results;
+ }
+
+ @Override
+ public Map getAggregations(String indexName, String query)
+ throws DotDataException {
+ indexName = resolveIndexOrAlias(indexName);
+ if (indexName == null || !IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index or alias");
+ }
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\\\\\/");
+ }
+
+ try {
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), new JSONObject(query));
+ return response.aggregationTree();
+ } catch (final Exception e) {
+ Logger.error(this.getClass(), "Error getting aggregations for query.\n" + e.getMessage(), e);
+ throw new DotSearchException("Error getting aggregations for query.\n" + e.getMessage(), e);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @deprecated use {@link #getAggregations(String, String)} instead.
+ */
+ @Deprecated
+ @Override
+ public Map getFacets(String indexName, String query) throws DotDataException {
+ indexName = resolveIndexOrAlias(indexName);
+ if (indexName == null || !IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index or alias");
+ }
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\\\\\/");
+ }
+
+ try {
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), new JSONObject(query));
+ return response.aggregationTree();
+ } catch (final Exception e) {
+ Logger.error(this.getClass(), "Error getting Facets for query.\n" + e.getMessage(), e);
+ throw new DotSearchException("Error getting Facets for query.\n" + e.getMessage(), e);
+ }
+ }
+
+ // =========================================================================
+ // Default index activation / inspection
+ // =========================================================================
+
+ @Override
+ public boolean isDefaultIndex(final String indexName) throws DotDataException {
+ return indexName != null && indexName.equals(defaultSiteSearchIndex().orElse(null));
+ }
+
+ @Override
+ public void activateIndex(final String indexName) throws DotDataException {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ return;
+ }
+ final VersionedIndicesImpl.Builder builder = copyDefaultIndices();
+ builder.siteSearch(indexName);
+ saveDefaultIndices(builder);
+ }
+
+ @Override
+ public void deactivateIndex(final String indexName) throws DotDataException, IOException {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ return;
+ }
+ // Rebuild the default indices without the site-search slot. saveIndices() does a
+ // delete-by-version then re-insert, so omitting the slot clears the pointer while preserving
+ // the content live/working rows. If site-search was the ONLY slot for this version, the
+ // rebuilt info would be empty (saveIndices rejects empty), so drop the version row instead.
+ final VersionedIndicesImpl rebuilt = copyDefaultIndicesExceptSiteSearch().build();
+ final VersionedIndicesAPI api = APILocator.getVersionedIndicesAPI();
+ if (rebuilt.hasAnyIndex()) {
+ api.saveIndices(rebuilt);
+ } else {
+ api.removeVersion(rebuilt.version());
+ }
+ api.clearCache();
+ }
+
+ // =========================================================================
+ // Index creation / mapping
+ // =========================================================================
+
+ @Override
+ public synchronized boolean createSiteSearchIndex(String indexName, final String alias, final int shards)
+ throws DotSearchException, IOException {
+ if (indexName == null) {
+ return false;
+ }
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return false;
+ }
+
+ indexName = indexName.toLowerCase();
+ final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+ URL url = classLoader.getResource("es-sitesearch-settings.json");
+ final String settings = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
+ url = classLoader.getResource("es-sitesearch-mapping.json");
+ final String mapping = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
+
+ try {
+ indexApi.createIndex(indexName, settings, shards);
+ } catch (final Exception e) {
+ throw new DotSearchException("Error creating OpenSearch site search index: " + e.getMessage(), e);
+ }
+
+ if (UtilMethods.isSet(alias)) {
+ indexApi.createAlias(indexName, alias);
+ }
+
+ try {
+ mappingOperations.putMapping(List.of(indexName), mapping);
+ } catch (final IOException e) {
+ throw new DotSearchException("Error applying mapping to OpenSearch site search index: "
+ + e.getMessage(), e);
+ }
+
+ return true;
+ }
+
+ @Override
+ public synchronized boolean setAlias(String indexName, final String alias) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return false;
+ }
+ if (UtilMethods.isNotSet(indexName) || UtilMethods.isNotSet(alias)) {
+ throw new IllegalArgumentException(String.format(
+ " either one or both params aren't set. index: `%s`, alias: `%s` ", indexName, alias));
+ }
+ indexName = indexName.toLowerCase();
+ indexApi.createAlias(indexName, alias);
+ return false;
+ }
+
+ /**
+ * Mirrors {@link ESSiteSearchAPI#deleteOldSiteSearchIndices()} but resolves the active index from
+ * {@link VersionedIndicesAPI} and deletes through the OpenSearch {@link IndexAPI} provider.
+ */
+ @Override
+ public void deleteOldSiteSearchIndices() {
+ final List indicesToRemove = new ArrayList<>(listIndices());
+
+ // Keep the default (active) site-search index.
+ defaultSiteSearchIndex().ifPresent(indicesToRemove::remove);
+
+ // Keep any index that backs an alias.
+ final List indicesWithAlias =
+ new ArrayList<>(indexApi.getIndexAlias(indicesToRemove).keySet());
+ indicesToRemove.removeAll(indicesWithAlias);
+
+ // Keep indices created within the last 24 hours.
+ final Date yesterday = Date.from(Instant.now().minus(Duration.ofDays(1)));
+ final long yesterdayTimestamp =
+ Long.parseLong(ContentletIndexAPIImpl.timestampFormatter.format(yesterday));
+
+ final List recent = new ArrayList<>();
+ for (final String index : indicesToRemove) {
+ try {
+ final long indexTimestamp = Long.parseLong(index.split("_")[1]);
+ if (indexTimestamp >= yesterdayTimestamp) {
+ recent.add(index);
+ }
+ } catch (final RuntimeException e) {
+ Logger.warn(this.getClass(),
+ "Unable to parse timestamp from site search index '" + index + "': " + e.getMessage());
+ }
+ }
+ indicesToRemove.removeAll(recent);
+
+ if (!indicesToRemove.isEmpty()) {
+ Logger.info(this.getClass(),
+ "The following indices will be deleted: " + String.join(",", indicesToRemove));
+ indexApi.deleteMultiple(indicesToRemove.toArray(new String[0]));
+ }
+ }
+
+ // =========================================================================
+ // Document operations
+ // =========================================================================
+
+ @Override
+ public void putToIndex(final String idx, final SiteSearchResult res, final String resultType) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ try {
+ if (res.getContentLength() == 0) {
+ return;
+ }
+ if (res.getTitle() == null && res.getFileName() != null) {
+ res.setTitle(res.getFileName());
+ }
+
+ // Strip HTML out of text content.
+ if (res.getContent() != null && UtilMethods.isSet(res.getMimeType())
+ && res.getMimeType().contains("text/")) {
+ res.getMap().put("content_raw", res.getContent());
+ res.setContent(res.getContent().replaceAll("\\<.*?\\>", ""));
+ }
+
+ String desc = res.getDescription();
+ if (!UtilMethods.isSet(res.getDescription()) && UtilMethods.isSet(res.getContent())) {
+ desc = UtilMethods.prettyShortenString(res.getContent(), 500);
+ }
+ res.setDescription(desc);
+
+ if (res.getMap().containsKey("keywords") && res.getMap().containsKey("seokeywords")) {
+ res.setKeywords((String) res.getMap().get("seokeywords"));
+ } else {
+ res.setKeywords((String) res.getMap().get("keywords"));
+ }
+
+ Logger.info(this.getClass(),
+ "writing from : " + idx + " type: " + resultType + " url:" + res.getUrl());
+ final String json = new ESMappingAPIImpl().toJsonString(res.getMap());
+
+ final String endpoint = "/" + physicalName(idx) + "/_doc/" + res.getId();
+ try (final Response response = clientProvider.getClient().generic()
+ .execute(Requests.builder()
+ .method("PUT")
+ .endpoint(endpoint)
+ .query(Map.of("refresh", "true"))
+ .body(Bodies.json(json))
+ .build())) {
+ final int status = response.getStatus();
+ if (status < 200 || status >= 300) {
+ Logger.error(this.getClass(), "putToIndex failed for doc " + res.getId()
+ + " — HTTP " + status);
+ }
+ }
+ } catch (final Exception e) {
+ Logger.error(OSSiteSearchAPI.class, e.getMessage(), e);
+ }
+ }
+
+ @Override
+ public void putToIndex(final String idx, final List res, final String resultType) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ for (final SiteSearchResult r : res) {
+ putToIndex(idx, r, resultType);
+ }
+ }
+
+ @Override
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ public SiteSearchResult getFromIndex(final String index, final String id) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return null;
+ }
+ try {
+ final String physical = physicalName(index);
+ final GetResponse
+ *
+ * @author Fabrizio Araya
+ */
+@ApplicationScoped
+@RunWith(DataProviderWeldRunner.class)
+public class OSSiteSearchAPIIntegrationTest extends IntegrationTestBase {
+
+ private static final String RUN_ID =
+ UUID.randomUUID().toString().replace("-", "").substring(0, 8);
+
+ /** Numeric suffix so names match the {@code sitesearch_} convention. */
+ private static final String SUFFIX = String.valueOf(Math.abs((long) RUN_ID.hashCode()));
+
+ private static final String IDX_ONE = "sitesearch_" + SUFFIX;
+ private static final String IDX_TWO = "sitesearch_" + (Long.parseLong(SUFFIX) + 1);
+
+ private static final String DOC_ID = "os-ss-it-" + RUN_ID;
+
+ @Inject
+ private OSSiteSearchAPI osSiteSearchAPI;
+
+ @Inject
+ private OSIndexAPIImpl osIndexAPI;
+
+ // =======================================================================
+ // Lifecycle
+ // =======================================================================
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+ }
+
+ @Before
+ public void setUp() {
+ cleanupTestData();
+ }
+
+ @After
+ public void tearDown() {
+ cleanupTestData();
+ }
+
+ // =======================================================================
+ // Section 1 — Core index lifecycle
+ // =======================================================================
+
+ /**
+ * Given scenario: a fresh site-search index name that does not yet exist in OpenSearch.
+ * Expected: createSiteSearchIndex creates it, indexExists reports it, and it shows up in
+ * listIndices.
+ */
+ @Test
+ public void test_createSiteSearchIndex_shouldExistAndBeListed() throws Exception {
+ assertFalse("Pre-condition: index must not exist yet", osIndexAPI.indexExists(IDX_ONE));
+
+ final boolean created = osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+
+ assertTrue("createSiteSearchIndex must return true", created);
+ assertTrue("Index must exist in OpenSearch after creation", osIndexAPI.indexExists(IDX_ONE));
+ assertTrue("Index must be returned by listIndices",
+ osSiteSearchAPI.listIndices().contains(IDX_ONE));
+
+ Logger.info(this, "✅ test_createSiteSearchIndex_shouldExistAndBeListed passed – index: " + IDX_ONE);
+ }
+
+ /**
+ * Given scenario: an existing site-search index.
+ * Expected: deleting it through the OpenSearch index API removes it from the cluster.
+ */
+ @Test
+ public void test_deleteSiteSearchIndex_shouldRemoveIt() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertTrue("Pre-condition: index must exist", osIndexAPI.indexExists(IDX_ONE));
+
+ osIndexAPI.delete(IDX_ONE);
+
+ assertFalse("Index must be gone after deletion", osIndexAPI.indexExists(IDX_ONE));
+ Logger.info(this, "✅ test_deleteSiteSearchIndex_shouldRemoveIt passed");
+ }
+
+ // =======================================================================
+ // Section 2 — Document round-trip (put / get / search / delete)
+ // =======================================================================
+
+ /**
+ * Given scenario: an empty site-search index.
+ * Expected: a document put to the index is retrievable by id, discoverable by search, and gone
+ * after deleteFromIndex.
+ */
+ @Test
+ public void test_putGetSearchDelete_documentRoundTrip() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertNull("Pre-condition: document must not exist yet",
+ osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID));
+
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID);
+ doc.setUrl("/os-site-search-it/" + RUN_ID);
+ doc.setTitle("OpenSearch Site Search IT " + RUN_ID);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms opensearch site search integration roundtrip " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+
+ osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
+
+ final SiteSearchResult fetched = osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID);
+ assertNotNull("Document must be retrievable after put", fetched);
+ assertEquals("Fetched document id must match", DOC_ID, fetched.getId());
+
+ final SiteSearchResults results = osSiteSearchAPI.search(IDX_ONE, "roundtrip", 0, 10);
+ assertNull("Search must not return an error: " + results.getError(), results.getError());
+ assertTrue("Search must find the indexed document", results.getTotalResults() >= 1);
+
+ osSiteSearchAPI.deleteFromIndex(IDX_ONE, DOC_ID);
+ assertNull("Document must be gone after deleteFromIndex",
+ osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID));
+
+ Logger.info(this, "✅ test_putGetSearchDelete_documentRoundTrip passed – hits: "
+ + results.getTotalResults());
+ }
+
+ /**
+ * Given scenario: an index holding a few documents that share a common term.
+ * Expected: a terms aggregation query returns a non-null aggregation tree keyed by the
+ * aggregation name.
+ */
+ @Test
+ public void test_getAggregations_shouldReturnBuckets() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+
+ for (int i = 0; i < 3; i++) {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID + "-" + i);
+ doc.setUrl("/agg/" + RUN_ID + "/" + i);
+ doc.setTitle("Aggregation doc " + i);
+ doc.setMimeType("text/html");
+ doc.setContent("aggregation bucket sample " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
+ }
+
+ final String aggQuery = new JSONObject()
+ .put("size", 0)
+ .put("aggs", new JSONObject().put("by_mime",
+ new JSONObject().put("terms",
+ new JSONObject().put("field", "mimeType")))).toString();
+
+ final Map aggregations =
+ osSiteSearchAPI.getAggregations(IDX_ONE, aggQuery);
+
+ assertNotNull("Aggregations map must not be null", aggregations);
+ assertTrue("Aggregation 'by_mime' must be present", aggregations.containsKey("by_mime"));
+
+ Logger.info(this, "✅ test_getAggregations_shouldReturnBuckets passed – keys: "
+ + aggregations.keySet());
+ }
+
+ // =======================================================================
+ // Section 3 — Default index activation (VersionedIndicesAPI)
+ // =======================================================================
+
+ /**
+ * Given scenario: a created site-search index that is not yet the default.
+ * Expected: activateIndex makes isDefaultIndex true and orders it first in listIndices;
+ * deactivateIndex clears the default.
+ */
+ @Test
+ public void test_activateDeactivate_shouldToggleDefault() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertFalse("Pre-condition: index must not be default yet",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ osSiteSearchAPI.activateIndex(IDX_ONE);
+ assertTrue("Index must be the default after activation",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ osSiteSearchAPI.deactivateIndex(IDX_ONE);
+ assertFalse("Index must no longer be the default after deactivation",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ Logger.info(this, "✅ test_activateDeactivate_shouldToggleDefault passed");
+ }
+
+ /**
+ * Given scenario: two created site-search indices with the second activated as default.
+ * Expected: listIndices returns both and places the active (default) index first.
+ */
+ @Test
+ public void test_listIndices_shouldPlaceDefaultFirst() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ osSiteSearchAPI.createSiteSearchIndex(IDX_TWO, null, 1);
+
+ osSiteSearchAPI.activateIndex(IDX_TWO);
+
+ final List indices = osSiteSearchAPI.listIndices();
+ assertTrue("Both indices must be listed",
+ indices.contains(IDX_ONE) && indices.contains(IDX_TWO));
+ assertEquals("The default index must be first", IDX_TWO, indices.get(0));
+
+ Logger.info(this, "✅ test_listIndices_shouldPlaceDefaultFirst passed – order: " + indices);
+ }
+
+ // =======================================================================
+ // Section 4 — Additional interface methods
+ // =======================================================================
+
+ /**
+ * Given scenario: no closed site-search indices for this run.
+ * Expected: listClosedIndices returns a non-null list without raising.
+ */
+ @Test
+ public void test_listClosedIndices_shouldNotFail() {
+ final List closed = osSiteSearchAPI.listClosedIndices();
+ assertNotNull("listClosedIndices must never return null", closed);
+ Logger.info(this, "✅ test_listClosedIndices_shouldNotFail passed – count: " + closed.size());
+ }
+
+ // =======================================================================
+ // Cleanup helpers
+ // =======================================================================
+
+ private synchronized void cleanupTestData() {
+ for (final String name : List.of(IDX_ONE, IDX_TWO)) {
+ try {
+ if (osIndexAPI.indexExists(name)) {
+ osIndexAPI.delete(name);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing OS index '" + name + "': " + e.getMessage());
+ }
+ }
+ cleanupVersionedRows();
+ }
+
+ private void cleanupVersionedRows() {
+ try {
+ new DotConnect()
+ .setSQL("DELETE FROM indicies WHERE index_name LIKE ?")
+ .addParam("%" + SUFFIX + "%")
+ .loadResult();
+ APILocator.getVersionedIndicesAPI().clearCache();
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing versioned DB rows: " + e.getMessage());
+ }
+ }
+}
From 3ebdc53f85d7baa5d19967156f99954c5c87c165 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 14:15:57 -0600
Subject: [PATCH 3/6] fix(sitesearch): use OpenSearch-format index settings for
OS site search
CI (OpenSearch Upgrade Suite) failed: every OSSiteSearchAPIIntegrationTest that
creates an index errored with "Failed to parse index settings". The OS impl was
loading es-sitesearch-settings.json, whose ES-only token-filter syntax (edgeNGram,
side) is rejected by the typed OpenSearch IndexSettings deserializer in
OSIndexAPIImpl.createIndex.
Add os-sitesearch-settings.json declaring the same analyzers (standard_content,
partial_content, comma_analyzer) in OpenSearch syntax (edge_ngram, no side), and
load it from OSSiteSearchAPI.createSiteSearchIndex. The mapping is vendor-neutral
and reused as-is.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/OSSiteSearchAPI.java | 6 ++-
.../resources/os-sitesearch-settings.json | 39 +++++++++++++++++++
2 files changed, 44 insertions(+), 1 deletion(-)
create mode 100644 dotCMS/src/main/resources/os-sitesearch-settings.json
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
index c1d226d23460..3a5afe625bfe 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -411,7 +411,11 @@ public synchronized boolean createSiteSearchIndex(String indexName, final String
indexName = indexName.toLowerCase();
final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
- URL url = classLoader.getResource("es-sitesearch-settings.json");
+ // OpenSearch-format settings: the legacy es-sitesearch-settings.json uses ES-only token
+ // filter syntax (e.g. edgeNGram / side) that the typed OpenSearch IndexSettings deserializer
+ // rejects. os-sitesearch-settings.json declares the same analyzers (standard_content,
+ // partial_content) in OpenSearch syntax. The mapping is vendor-neutral and is reused as-is.
+ URL url = classLoader.getResource("os-sitesearch-settings.json");
final String settings = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
url = classLoader.getResource("es-sitesearch-mapping.json");
final String mapping = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
diff --git a/dotCMS/src/main/resources/os-sitesearch-settings.json b/dotCMS/src/main/resources/os-sitesearch-settings.json
new file mode 100644
index 000000000000..168e3e0bcb1c
--- /dev/null
+++ b/dotCMS/src/main/resources/os-sitesearch-settings.json
@@ -0,0 +1,39 @@
+{
+ "analysis": {
+ "filter": {
+ "content_ngrams": {
+ "type": "edge_ngram",
+ "min_gram": 1,
+ "max_gram": 10
+ },
+ "content_stemmer": {
+ "type": "stemmer",
+ "name": "english"
+ }
+ },
+ "analyzer": {
+ "standard_content": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "asciifolding",
+ "content_stemmer"
+ ]
+ },
+ "partial_content": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "asciifolding",
+ "content_ngrams"
+ ]
+ },
+ "comma_analyzer": {
+ "type": "pattern",
+ "pattern": ","
+ }
+ }
+ }
+}
From c0fb1ac336b98c8b05f0bcfcfed507b84f07b0f7 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 14:51:12 -0600
Subject: [PATCH 4/6] fix(sitesearch): apply OS site-search mapping to the
untagged physical index
The aggregation IT failed: mimeType aggregation hit "Text fields are not optimised
... use a keyword field". Root cause: createSiteSearchIndex delegated the mapping
PUT to MappingOperationsOS, which force-tags the physical name with `.os`. Site
search uses untagged logical names, so the mapping landed on a different (`.os`)
index while the real index kept the dynamic default mapping (string -> text),
breaking keyword aggregations.
Apply the mapping with a raw PUT //_mapping against the same untagged
physical name used by createIndex/search/put, and drop the MappingOperationsOS
dependency.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/OSSiteSearchAPI.java | 42 ++++++++++++++-----
1 file changed, 31 insertions(+), 11 deletions(-)
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
index 3a5afe625bfe..3196bb222d3f 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -23,7 +23,6 @@
import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.content.index.domain.SearchHit;
import com.dotcms.content.index.domain.SearchHits;
-import com.dotcms.content.index.opensearch.MappingOperationsOS;
import com.dotcms.content.index.opensearch.OSClientProvider;
import com.dotcms.content.index.opensearch.OSIndexAPIImpl;
import com.dotcms.enterprise.LicenseUtil;
@@ -125,24 +124,20 @@ public class OSSiteSearchAPI implements SiteSearchAPI {
private final OSClientProvider clientProvider;
private final IndexAPI indexApi;
- private final MappingOperationsOS mappingOperations;
/** CDI-managed constructor. */
@Inject
public OSSiteSearchAPI() {
this(CDIUtils.getBeanThrows(OSClientProvider.class),
- CDIUtils.getBeanThrows(OSIndexAPIImpl.class),
- CDIUtils.getBeanThrows(MappingOperationsOS.class));
+ CDIUtils.getBeanThrows(OSIndexAPIImpl.class));
}
/** Package-private constructor for testing. */
@VisibleForTesting
OSSiteSearchAPI(final OSClientProvider clientProvider,
- final IndexAPI indexApi,
- final MappingOperationsOS mappingOperations) {
+ final IndexAPI indexApi) {
this.clientProvider = clientProvider;
this.indexApi = indexApi;
- this.mappingOperations = mappingOperations;
}
// =========================================================================
@@ -430,14 +425,39 @@ public synchronized boolean createSiteSearchIndex(String indexName, final String
indexApi.createAlias(indexName, alias);
}
- try {
- mappingOperations.putMapping(List.of(indexName), mapping);
+ putMapping(indexName, mapping);
+
+ return true;
+ }
+
+ /**
+ * Applies the mapping to the site-search index via a raw {@code PUT //_mapping}.
+ *
+ *
Done here rather than via {@code MappingOperationsOS} on purpose: that helper force-tags the
+ * physical name with {@code .os}, which would target a different index than the untagged one this
+ * class creates and queries (see the class "Index naming" note), leaving the real index on the
+ * dynamic default mapping (string fields become {@code text}, which then breaks keyword
+ * aggregations such as {@code mimeType}). Forwarding to the same untagged physical name used by
+ * {@code createIndex}/search/put keeps the mapping on the index that is actually hit.
+ */
+ private void putMapping(final String indexName, final String mapping) throws DotSearchException {
+ final String endpoint = "/" + physicalName(indexName) + "/_mapping";
+ try (final Response response = clientProvider.getClient().generic()
+ .execute(Requests.builder()
+ .method("PUT")
+ .endpoint(endpoint)
+ .body(Bodies.json(mapping))
+ .build())) {
+ final int status = response.getStatus();
+ if (status < 200 || status >= 300) {
+ throw new DotSearchException("Error applying mapping to OpenSearch site search index "
+ + indexName + " — HTTP " + status + " — "
+ + response.getBody().map(Body::bodyAsString).orElse(""));
+ }
} catch (final IOException e) {
throw new DotSearchException("Error applying mapping to OpenSearch site search index: "
+ e.getMessage(), e);
}
-
- return true;
}
@Override
From aadbb3bc5ec4b80d6e773eb28a146959895e2db7 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 16:19:15 -0600
Subject: [PATCH 5/6] test(sitesearch): integration test for SiteSearchWebAPI
view tool (#35786)
Adds SiteSearchWebAPITest covering the view-tool surface affected by the
neutral-aggregation refactor: search() (default-index, alias, pagination, empty
and error paths) with full SiteSearchResults/SiteSearchResult field assertions;
getAggregations() over the neutral Aggregation/AggregationBucket tree (terms,
nested top_hits, numeric-histogram getKeyAsNumber); and getFacets() across all
three legacy wrappers (string-terms, count-histogram, plain Facet fallback).
Registered in MainSuite1b alongside ContentSearchToolTest.
Also a minor List.getFirst() cleanup in SiteSearchAPIImpl.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/SiteSearchAPIImpl.java | 4 +-
.../src/test/java/com/dotcms/MainSuite1b.java | 1 +
.../viewtool/SiteSearchWebAPITest.java | 494 ++++++++++++++++++
3 files changed, 497 insertions(+), 2 deletions(-)
create mode 100644 dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
index 750d93d16bc0..43e020d2a2ff 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
@@ -103,7 +103,7 @@ public SiteSearchAPIImpl() {
public List listIndices() {
final List providers = router.writeProviders();
if (providers.size() == 1) {
- return providers.get(0).listIndices();
+ return providers.getFirst().listIndices();
}
final Set merged = new LinkedHashSet<>(esImpl.listIndices());
merged.addAll(osImpl.listIndices());
@@ -114,7 +114,7 @@ public List listIndices() {
public List listClosedIndices() {
final List providers = router.writeProviders();
if (providers.size() == 1) {
- return providers.get(0).listClosedIndices();
+ return providers.getFirst().listClosedIndices();
}
final Set merged = new LinkedHashSet<>(esImpl.listClosedIndices());
merged.addAll(osImpl.listClosedIndices());
diff --git a/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java b/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
index b1e5bf853a22..fa9b83f71785 100644
--- a/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
+++ b/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
@@ -52,6 +52,7 @@
com.dotcms.rendering.velocity.viewtools.content.ContentMapTest.class,
com.dotcms.rendering.velocity.viewtools.content.ContentToolTest.class,
com.dotcms.rendering.velocity.viewtools.ContentSearchToolTest.class,
+ com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPITest.class,
com.dotcms.rendering.velocity.viewtools.WorkflowToolTest.class,
com.dotcms.rendering.velocity.viewtools.WebsiteToolTest.class,
com.dotcms.rendering.velocity.viewtools.LanguageWebAPITest.class,
diff --git a/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
new file mode 100644
index 000000000000..215d94a58fa2
--- /dev/null
+++ b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
@@ -0,0 +1,494 @@
+package com.dotmarketing.sitesearch.viewtool;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.dotcms.IntegrationTestBase;
+import com.dotcms.LicenseTestUtil;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.AggregationBucket;
+import com.dotcms.content.index.domain.SearchHit;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults;
+import com.dotcms.util.IntegrationTestInitService;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.Facet;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperCountDateHistogramFacet;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperStringTermsFacet;
+import com.dotmarketing.util.Logger;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.velocity.tools.view.context.ViewContext;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Integration test for the {@link SiteSearchWebAPI} Velocity view tool, modelled on
+ * {@code ContentSearchToolTest}.
+ *
+ *
Exercises the public view-tool surface end-to-end against a live search backend after the
+ * Elasticsearch → OpenSearch neutral-aggregation refactor (#35786), with emphasis on the fields of
+ * the POJOs returned by the refactored methods:
+ *
+ *
{@code search(...)} → {@link SiteSearchResults} / {@link SiteSearchResult} fields, the
+ * alias path, the default-index path, pagination and error states.
+ *
{@code getAggregations(...)} → the neutral {@link Aggregation} / {@link AggregationBucket}
+ * tree: name/type/buckets, doc counts, {@code getKeyAsNumber} (numeric histogram), and the
+ * nested {@code top_hits} {@link SearchHit}s.
+ *
{@code getFacets(...)} → all three legacy wrappers: string-terms, date/numeric-histogram and
+ * the plain {@link Facet} fallback, plus their entry POJOs.
The tool resolves its backend through {@code APILocator.getSiteSearchAPI()} — now the
+ * {@code SiteSearchAPIImpl} phase router — so this also proves the router wiring did not break the
+ * legacy view-tool contract. Runs in the default integration profile (migration Phase 0 →
+ * Elasticsearch), like {@code ContentSearchToolTest}; no OpenSearch container is required.
+ *
+ * @author Fabrizio Araya
+ */
+public class SiteSearchWebAPITest extends IntegrationTestBase {
+
+ private static final long SUFFIX = System.currentTimeMillis();
+ private static final String IDX = "sitesearch_" + SUFFIX;
+ private static final String ALIAS = "ss_it_alias_" + SUFFIX;
+
+ /** Unique token embedded in every indexed doc so the text query matches only this run's data. */
+ private static final String TOKEN = "ssqa" + SUFFIX;
+
+ private static final String MIME_HTML = "text/html";
+ private static final String MIME_PDF = "application/pdf";
+ private static final Set EXPECTED_MIMES = Set.of(MIME_HTML, MIME_PDF);
+
+ /** 3 html docs + 2 pdf docs = 5 docs, all carrying TOKEN. */
+ private static final int HTML_DOCS = 3;
+ private static final int PDF_DOCS = 2;
+ private static final int TOTAL_DOCS = HTML_DOCS + PDF_DOCS;
+
+ // ---- Queries (JSON, so search() skips the request-host lookup) -----------------------------
+
+ private static final String SEARCH_TOKEN =
+ "{\"query\":{\"query_string\":{\"query\":\"" + "TOKEN_PLACEHOLDER"
+ + "\",\"default_field\":\"*\"}}}";
+
+ private static final String TERMS_AGG =
+ "{\"size\":0,\"aggs\":{\"by_mime\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}}}}";
+
+ private static final String NESTED_AGG =
+ "{\"size\":0,\"aggs\":{\"by_mime\":{\"terms\":{\"field\":\"mimeType\",\"size\":10},"
+ + "\"aggs\":{\"top_docs\":{\"top_hits\":{\"size\":2}}}}}}";
+
+ private static final String HISTO_AGG =
+ "{\"size\":0,\"aggs\":{\"by_len\":{\"histogram\":{\"field\":\"contentLength\","
+ + "\"interval\":25}}}}";
+
+ /** Query matches no doc, so the terms aggregation comes back with empty buckets. */
+ private static final String EMPTY_AGG =
+ "{\"size\":0,\"query\":{\"term\":{\"mimeType\":\"zzz/none\"}},"
+ + "\"aggs\":{\"empty\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}}}}";
+
+ private static SiteSearchAPI siteSearchAPI;
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+
+ siteSearchAPI = APILocator.getSiteSearchAPI();
+
+ // Create the index WITH an alias (so the alias search path is exercised) and activate it as
+ // the default (so the default-index search path is exercised).
+ siteSearchAPI.createSiteSearchIndex(IDX, ALIAS, 1);
+ siteSearchAPI.activateIndex(IDX);
+
+ for (int i = 0; i < TOTAL_DOCS; i++) {
+ final boolean html = i < HTML_DOCS;
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId("ss-it-" + SUFFIX + "-" + i);
+ doc.setUrl("/site-search-webapi-it/" + i);
+ doc.setTitle("Site Search WebAPI IT doc " + i);
+ doc.setHost("demo.dotcms.com");
+ doc.setAuthor("qa-author-" + i);
+ doc.setMimeType(html ? MIME_HTML : MIME_PDF);
+ // Vary the body length so the numeric histogram on contentLength spreads over buckets.
+ doc.setContent("dotcms site search viewtool integration " + TOKEN
+ + " ".repeat(i * 30));
+ doc.setContentLength(doc.getContent().length());
+ siteSearchAPI.putToIndex(IDX, doc, "content");
+ }
+ }
+
+ @AfterClass
+ public static void cleanup() {
+ try {
+ siteSearchAPI.deactivateIndex(IDX);
+ } catch (final Exception e) {
+ Logger.warn(SiteSearchWebAPITest.class, "Cleanup: deactivate failed: " + e.getMessage());
+ }
+ try {
+ APILocator.getESIndexAPI()
+ .delete(APILocator.getESIndexAPI().getNameWithClusterIDPrefix(IDX));
+ } catch (final Exception e) {
+ Logger.warn(SiteSearchWebAPITest.class, "Cleanup: delete failed: " + e.getMessage());
+ }
+ }
+
+ /** Builds a {@link SiteSearchWebAPI} initialized with a mock request/response. */
+ private SiteSearchWebAPI siteSearchWebAPI() {
+ final ViewContext viewContext = mock(ViewContext.class);
+ final HttpServletRequest request = mock(HttpServletRequest.class);
+ final HttpServletResponse response = mock(HttpServletResponse.class);
+ when(viewContext.getRequest()).thenReturn(request);
+ when(viewContext.getResponse()).thenReturn(response);
+
+ final SiteSearchWebAPI tool = new SiteSearchWebAPI();
+ tool.init(viewContext);
+ return tool;
+ }
+
+ private static String searchToken() {
+ return SEARCH_TOKEN.replace("TOKEN_PLACEHOLDER", TOKEN);
+ }
+
+ // =========================================================================
+ // listSearchIndicies
+ // =========================================================================
+
+ /**
+ * Given scenario: a populated, active site-search index.
+ * Expected: listSearchIndicies() (and its legacy-typo alias) returns the created index.
+ */
+ @Test
+ public void listSearchIndicies_containsCreatedIndex() {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ assertTrue("listSearchIndicies() must contain the created index",
+ tool.listSearchIndicies().contains(IDX));
+ assertTrue("legacy-typo alias listSearchIncidies() must behave identically",
+ tool.listSearchIncidies().contains(IDX));
+
+ Logger.info(this, "✅ listSearchIndicies_containsCreatedIndex passed");
+ }
+
+ // =========================================================================
+ // search — SiteSearchResults / SiteSearchResult field coverage
+ // =========================================================================
+
+ /**
+ * Given scenario: 5 docs carrying TOKEN in the default (active) index.
+ * Expected: the default-index search (3-arg) populates every SiteSearchResults field and each
+ * SiteSearchResult exposes id/url/title/mimeType/score.
+ */
+ @Test
+ public void search_defaultIndex_populatesResultFields() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(searchToken(), 0, 10);
+
+ assertNull("Search must not return an error: " + results.getError(), results.getError());
+ assertEquals("All TOKEN docs must be counted", TOTAL_DOCS, results.getTotalResults());
+ assertEquals("getTotalHits() alias must match getTotalResults()",
+ results.getTotalResults(), results.getTotalHits());
+ assertEquals("Result rows must match the total (under the page size)",
+ TOTAL_DOCS, results.getResults().size());
+ assertTrue("maxScore must be positive for a matching query", results.getMaxScore() > 0);
+ assertEquals("offset must reflect the requested start", 0, results.getOffset());
+ assertEquals("start alias must match offset", results.getOffset(), results.getStart());
+ assertEquals("limit must reflect the requested rows", 10, results.getLimit());
+ assertNotNull("query echo must be set", results.getQuery());
+ assertNotNull("took must be set", results.getTook());
+
+ for (final SiteSearchResult hit : results.getResults()) {
+ assertNotNull("each hit must carry an id", hit.getId());
+ assertTrue("each hit id must belong to this run", hit.getId().startsWith("ss-it-" + SUFFIX));
+ assertNotNull("each hit must carry a url", hit.getUrl());
+ assertNotNull("each hit must carry a title", hit.getTitle());
+ assertTrue("each hit mimeType must be one of the indexed types",
+ EXPECTED_MIMES.contains(hit.getMimeType()));
+ assertTrue("each hit must have a positive score", hit.getScore() > 0);
+ }
+
+ Logger.info(this, "✅ search_defaultIndex_populatesResultFields passed – hits: "
+ + results.getTotalResults());
+ }
+
+ /**
+ * Given scenario: the index was created with an alias.
+ * Expected: the 4-arg alias search resolves the alias to the backing index and returns the docs.
+ */
+ @Test
+ public void search_byAlias_resolvesIndex() {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(ALIAS, searchToken(), 0, 10);
+
+ assertNull("Alias search must not return an error: " + results.getError(),
+ results.getError());
+ assertEquals("Alias search must reach the same docs", TOTAL_DOCS, results.getTotalResults());
+
+ Logger.info(this, "✅ search_byAlias_resolvesIndex passed");
+ }
+
+ /**
+ * Given scenario: a JSON body that caps the page size to 2.
+ * Expected: the returned rows are capped to the page size while the total still reflects all
+ * matches — covering the offset/limit/totalResults fields together.
+ */
+ @Test
+ public void search_pagination_capsReturnedRows() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final String paged = "{\"size\":2,\"query\":{\"query_string\":{\"query\":\"" + TOKEN
+ + "\",\"default_field\":\"*\"}}}";
+ final SiteSearchResults results = tool.search(paged, 0, 2);
+
+ assertNull("Paged search must not error: " + results.getError(), results.getError());
+ assertEquals("Total must still reflect every match", TOTAL_DOCS, results.getTotalResults());
+ assertTrue("Returned rows must be capped by the page size",
+ results.getResults().size() <= 2);
+
+ Logger.info(this, "✅ search_pagination_capsReturnedRows passed – returned: "
+ + results.getResults().size());
+ }
+
+ /**
+ * Given scenario: a query for a token that matches nothing.
+ * Expected: zero results, an empty result list and no error (a clean empty response).
+ */
+ @Test
+ public void search_noMatch_returnsEmptyWithoutError() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final String noMatch = "{\"query\":{\"query_string\":{\"query\":\"zzznomatchzzz" + SUFFIX
+ + "\",\"default_field\":\"*\"}}}";
+ final SiteSearchResults results = tool.search(noMatch, 0, 10);
+
+ assertNull("No-match search must not error", results.getError());
+ assertEquals("No-match search must count zero", 0, results.getTotalResults());
+ assertTrue("No-match search must return no rows", results.getResults().isEmpty());
+
+ Logger.info(this, "✅ search_noMatch_returnsEmptyWithoutError passed");
+ }
+
+ /**
+ * Given scenario: a null query.
+ * Expected: the tool reports an error on the SiteSearchResults rather than throwing.
+ */
+ @Test
+ public void search_nullQuery_setsError() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(null, 0, 10);
+
+ assertNotNull("A null query must surface an error", results.getError());
+ Logger.info(this, "✅ search_nullQuery_setsError passed – error: " + results.getError());
+ }
+
+ // =========================================================================
+ // getAggregations — Aggregation / AggregationBucket field coverage
+ // =========================================================================
+
+ /**
+ * Given scenario: 3 html + 2 pdf docs.
+ * Expected: the terms aggregation on mimeType exposes a populated neutral Aggregation — name,
+ * type, two buckets with correct doc counts, string keys, null numeric keys (non-numeric) and no
+ * top-hits — covering the multi-bucket AggregationBucket accessors.
+ */
+ @Test
+ public void getAggregations_termsBuckets_fieldsPopulated() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, TERMS_AGG);
+
+ assertNotNull("Aggregations map must not be null", aggregations);
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("'by_mime' aggregation must be present", byMime);
+ assertEquals("aggregation name must round-trip", "by_mime", byMime.getName());
+ assertNotNull("aggregation type must be reported", byMime.getType());
+ assertNull("a terms aggregation carries no top-hits", byMime.getHits());
+ assertEquals("there must be one bucket per mimeType", 2, byMime.getBuckets().size());
+
+ long htmlCount = -1;
+ long pdfCount = -1;
+ for (final AggregationBucket bucket : byMime.getBuckets()) {
+ assertTrue("bucket key must be a known mimeType",
+ EXPECTED_MIMES.contains(bucket.getKey()));
+ assertEquals("getKeyAsString must mirror getKey", bucket.getKey(),
+ bucket.getKeyAsString());
+ assertNull("a non-numeric key must yield a null number", bucket.getKeyAsNumber());
+ assertTrue("each bucket must carry documents", bucket.getDocCount() > 0);
+ assertTrue("a terms bucket has no sub-aggregations here",
+ bucket.getAggregations().isEmpty());
+ if (MIME_HTML.equals(bucket.getKey())) {
+ htmlCount = bucket.getDocCount();
+ } else if (MIME_PDF.equals(bucket.getKey())) {
+ pdfCount = bucket.getDocCount();
+ }
+ }
+ assertEquals("html bucket must count the html docs", HTML_DOCS, htmlCount);
+ assertEquals("pdf bucket must count the pdf docs", PDF_DOCS, pdfCount);
+
+ Logger.info(this, "✅ getAggregations_termsBuckets_fieldsPopulated passed");
+ }
+
+ /**
+ * Given scenario: a terms aggregation with a nested top_hits sub-aggregation.
+ * Expected: the neutral tree preserves the nested {@code top_docs} as an Aggregation that carries
+ * SearchHits, and each SearchHit exposes id and source — covering getHits()/SearchHit fields and
+ * the nested getAggregations() path.
+ */
+ @Test
+ public void getAggregations_nestedTopHits_preserved() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, NESTED_AGG);
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("'by_mime' aggregation must be present", byMime);
+ assertFalse("'by_mime' must have buckets", byMime.getBuckets().isEmpty());
+
+ final AggregationBucket firstBucket = byMime.getBuckets().getFirst();
+ final Aggregation topDocs = firstBucket.getAggregations().get("top_docs");
+ assertNotNull("nested top_hits sub-aggregation must be preserved", topDocs);
+ assertNotNull("top_hits must carry a SearchHits container", topDocs.getHits());
+
+ final List hits = topDocs.getHits().getHits();
+ assertFalse("top_hits must carry at least one hit", hits.isEmpty());
+ final SearchHit hit = hits.getFirst();
+ assertNotNull("each top-hit must expose an id", hit.getId());
+ assertFalse("each top-hit must expose its source document",
+ hit.getSourceAsMap().isEmpty());
+
+ Logger.info(this, "✅ getAggregations_nestedTopHits_preserved passed – topHits: " + hits.size());
+ }
+
+ /**
+ * Given scenario: a numeric histogram on the long field {@code contentLength}.
+ * Expected: the buckets carry numeric keys, so {@link AggregationBucket#getKeyAsNumber()} returns
+ * a non-null Number — covering the numeric-key path (distinct from the non-numeric terms keys).
+ */
+ @Test
+ public void getAggregations_numericHistogram_keyAsNumber() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, HISTO_AGG);
+ final Aggregation byLen = aggregations.get("by_len");
+ assertNotNull("'by_len' histogram aggregation must be present", byLen);
+ assertTrue("histogram type must be reported as a histogram",
+ byLen.getType().contains("histogram"));
+ assertFalse("histogram must produce buckets", byLen.getBuckets().isEmpty());
+
+ boolean sawPopulatedNumericBucket = false;
+ for (final AggregationBucket bucket : byLen.getBuckets()) {
+ assertNotNull("a histogram bucket key must be numeric", bucket.getKeyAsNumber());
+ if (bucket.getDocCount() > 0) {
+ sawPopulatedNumericBucket = true;
+ }
+ }
+ assertTrue("at least one histogram bucket must contain documents", sawPopulatedNumericBucket);
+
+ Logger.info(this, "✅ getAggregations_numericHistogram_keyAsNumber passed");
+ }
+
+ // =========================================================================
+ // getFacets — legacy wrapper coverage (terms / histogram / plain)
+ // =========================================================================
+
+ /**
+ * Given scenario: a terms aggregation with non-empty buckets.
+ * Expected: getFacets wraps it as an {@link InternalWrapperStringTermsFacet} exposing name/type
+ * and term entries with term + count — covering the legacy string-terms facet POJO.
+ */
+ @Test
+ public void getFacets_termsAggregation_wrapsAsStringTermsFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, TERMS_AGG);
+ assertNotNull("Facets map must not be null", facets);
+
+ final Facet facet = facets.get("by_mime");
+ assertNotNull("'by_mime' facet must be present", facet);
+ assertEquals("facet name must round-trip", "by_mime", facet.getName());
+ assertNotNull("facet type must be reported", facet.getType());
+ assertTrue("non-empty terms aggregation must map to InternalWrapperStringTermsFacet",
+ facet instanceof InternalWrapperStringTermsFacet);
+
+ final InternalWrapperStringTermsFacet termsFacet = (InternalWrapperStringTermsFacet) facet;
+ assertEquals("there must be one entry per bucket", 2, termsFacet.entries().size());
+
+ long htmlCount = -1;
+ for (final var entry : termsFacet.entries()) {
+ assertTrue("entry term must be a known mimeType", EXPECTED_MIMES.contains(entry.getTerm()));
+ assertTrue("entry count must be positive", entry.getCount() > 0);
+ if (MIME_HTML.equals(entry.getTerm())) {
+ htmlCount = entry.getCount();
+ }
+ }
+ assertEquals("html term entry must count the html docs", HTML_DOCS, htmlCount);
+
+ Logger.info(this, "✅ getFacets_termsAggregation_wrapsAsStringTermsFacet passed");
+ }
+
+ /**
+ * Given scenario: a numeric histogram aggregation.
+ * Expected: getFacets wraps it as an {@link InternalWrapperCountDateHistogramFacet} exposing
+ * CountEntry rows with time (the numeric key) and count — covering the legacy histogram facet
+ * POJO and the {@code isHistogram} branch.
+ */
+ @Test
+ public void getFacets_histogramAggregation_wrapsAsCountHistogramFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, HISTO_AGG);
+ final Facet facet = facets.get("by_len");
+ assertNotNull("'by_len' facet must be present", facet);
+ assertTrue("a histogram aggregation must map to InternalWrapperCountDateHistogramFacet",
+ facet instanceof InternalWrapperCountDateHistogramFacet);
+
+ final InternalWrapperCountDateHistogramFacet histoFacet =
+ (InternalWrapperCountDateHistogramFacet) facet;
+ assertFalse("histogram facet must expose count entries", histoFacet.entries().isEmpty());
+
+ boolean sawPopulatedEntry = false;
+ for (final var entry : histoFacet.entries()) {
+ assertTrue("entry time (numeric key) must be non-negative", entry.getTime() >= 0);
+ if (entry.getCount() > 0) {
+ sawPopulatedEntry = true;
+ }
+ }
+ assertTrue("at least one histogram entry must carry a count", sawPopulatedEntry);
+
+ Logger.info(this, "✅ getFacets_histogramAggregation_wrapsAsCountHistogramFacet passed");
+ }
+
+ /**
+ * Given scenario: a terms aggregation whose query matches no document (empty buckets).
+ * Expected: getFacets falls back to a plain {@link Facet} (neither wrapper), still exposing
+ * name and type — covering the empty-bucket branch.
+ */
+ @Test
+ public void getFacets_emptyBuckets_fallsBackToPlainFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, EMPTY_AGG);
+ final Facet facet = facets.get("empty");
+ assertNotNull("'empty' facet must be present", facet);
+ assertEquals("facet name must round-trip", "empty", facet.getName());
+ assertNotNull("facet type must be reported", facet.getType());
+ assertFalse("an empty terms aggregation must NOT be a string-terms wrapper",
+ facet instanceof InternalWrapperStringTermsFacet);
+ assertFalse("an empty terms aggregation must NOT be a histogram wrapper",
+ facet instanceof InternalWrapperCountDateHistogramFacet);
+
+ Logger.info(this, "✅ getFacets_emptyBuckets_fallsBackToPlainFacet passed");
+ }
+}
From 5c0a4df5e21c5f930426d9574d40d7bb9d872e4a Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 17:30:52 -0600
Subject: [PATCH 6/6] fix(sitesearch): isolate dual-write fan-out per provider
and pin ES mapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Two OpenSearch site-search regressions surfaced by the dual-write fan-out:
1. Shared mutable result across the fan-out. SiteSearchAPIImpl.putToIndex
handed the same SiteSearchResult to both leaves. putToIndex mutates the
backing map (setKeywords rewrites "keywords" String -> List), so the first
leaf (ES) corrupted the input the second leaf (OS) then read, throwing
ClassCastException: EmptyList cannot be cast to String and silently dropping
every document from OpenSearch. The router now copies the result (and each
element of the batch overload) per provider.
2. Mapping fan-out leak. ESSiteSearchAPI.createSiteSearchIndex applied its
mapping through the phase-dispatched ESMappingAPIImpl.putMapping, which fanned
out a second time to OpenSearch using a .os-tagged physical name that
site-search OS indices never use -> HTTP 404. Pinned the ES leaf to
IndexTag.ES, restoring the single-fan-out invariant (SiteSearchAPIImpl already
drives OSSiteSearchAPI, which owns its own untagged OS index + mapping).
Adds SiteSearchDualWriteRouterIT (registered in OpenSearchUpgradeSuite) which
drives the router in Phase 1 dual-write and asserts documents reach OpenSearch
(single + batch) — the isolated OS-leaf IT cannot reproduce either bug.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/ESSiteSearchAPI.java | 9 +-
.../sitesearch/SiteSearchAPIImpl.java | 31 ++-
.../com/dotcms/OpenSearchUpgradeSuite.java | 4 +-
.../SiteSearchDualWriteRouterIT.java | 246 ++++++++++++++++++
4 files changed, 285 insertions(+), 5 deletions(-)
create mode 100644 dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
index 3197bd4b0735..25ab67a4ecff 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
@@ -14,6 +14,7 @@
import com.dotcms.content.elasticsearch.business.*;
import com.dotcms.content.elasticsearch.util.RestHighLevelClientProvider;
import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.IndexTag;
import com.dotcms.content.index.domain.Aggregation;
import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.enterprise.LicenseUtil;
@@ -392,8 +393,12 @@ public synchronized boolean createSiteSearchIndex(String indexName, String alias
indexApi.createAlias(indexName, alias);
}
- //put mappings
- mappingAPI.putMapping(indexName, mapping);
+ // Put mappings on the ES index only. ESMappingAPIImpl.putMapping(String, String) is
+ // phase-dispatched and would fan out to OpenSearch, but SiteSearchAPIImpl is already the
+ // single fan-out point for site search (it invokes OSSiteSearchAPI separately, which owns
+ // its own untagged OS index + mapping). Fanning out here too would re-issue the mapping to
+ // a `.os`-tagged physical name that site-search OS indices never use → HTTP 404. Pin to ES.
+ mappingAPI.putMapping(List.of(indexName), mapping, IndexTag.ES);
return true;
}
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
index 43e020d2a2ff..7893f2b48dad 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -222,12 +223,38 @@ public void deactivateIndex(final String indexName) throws DotDataException, IOE
@Override
public void putToIndex(final String idx, final SiteSearchResult res, final String resultType) {
- router.write(impl -> impl.putToIndex(idx, res, resultType));
+ // Each provider gets its own copy: putToIndex mutates the result's backing map
+ // (e.g. SiteSearchResult.setKeywords rewrites the "keywords" entry String -> List), so a
+ // shared instance would let the first provider in the fan-out corrupt the input the next
+ // provider reads — producing a ClassCastException on the second leaf. The lambda is invoked
+ // once per provider, so copyOf(res) is evaluated fresh from the untouched original each time.
+ router.write(impl -> impl.putToIndex(idx, copyOf(res), resultType));
}
@Override
public void putToIndex(final String idx, final List res, final String resultType) {
- router.write(impl -> impl.putToIndex(idx, res, resultType));
+ // See single-result overload: copy per provider so the fan-out never shares mutable state.
+ router.write(impl -> impl.putToIndex(idx, copyOf(res), resultType));
+ }
+
+ /**
+ * Shallow-copies a {@link SiteSearchResult} so the fan-out can hand an independent instance to
+ * each write provider. {@code putToIndex} mutates the backing map in place (HTML stripping,
+ * description derivation, {@code keywords} String→List rewrite); copying the map prevents one
+ * provider's mutations from leaking into the next provider's input. A shallow map copy is
+ * sufficient because every mutation replaces a map entry rather than mutating a value object.
+ */
+ private static SiteSearchResult copyOf(final SiteSearchResult res) {
+ return new SiteSearchResult(new HashMap<>(res.getMap()));
+ }
+
+ /** Copies each element of a result batch — see {@link #copyOf(SiteSearchResult)}. */
+ private static List copyOf(final List results) {
+ final List copies = new ArrayList<>(results.size());
+ for (final SiteSearchResult r : results) {
+ copies.add(copyOf(r));
+ }
+ return copies;
}
@Override
diff --git a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
index 29aa50e3e430..ce000e4e7f0f 100644
--- a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
+++ b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
@@ -13,6 +13,7 @@
import com.dotcms.content.index.opensearch.OSClientProviderIntegrationTest;
import com.dotcms.content.index.opensearch.OSSearchAPIImplIntegrationTest;
import com.dotcms.content.index.opensearch.OSSiteSearchAPIIntegrationTest;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchDualWriteRouterIT;
import com.dotcms.junit.MainBaseSuite;
import org.junit.runner.RunWith;
import org.junit.runners.Suite.SuiteClasses;
@@ -48,7 +49,8 @@
ContentletIndexAPIImplMigrationIntegrationTest.class,
ContentletIndexAPIImplPhaseSwitchIntegrationTest.class,
OSSearchAPIImplIntegrationTest.class,
- OSSiteSearchAPIIntegrationTest.class
+ OSSiteSearchAPIIntegrationTest.class,
+ SiteSearchDualWriteRouterIT.class
})
public class OpenSearchUpgradeSuite {
}
\ No newline at end of file
diff --git a/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java b/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java
new file mode 100644
index 000000000000..a400ce4aae36
--- /dev/null
+++ b/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java
@@ -0,0 +1,246 @@
+package com.dotcms.enterprise.publishing.sitesearch;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeFalse;
+
+import com.dotcms.DataProviderWeldRunner;
+import com.dotcms.IntegrationTestBase;
+import com.dotcms.LicenseTestUtil;
+import com.dotcms.content.elasticsearch.business.ESIndexAPI;
+import com.dotcms.content.index.IndexAPIImpl;
+import com.dotcms.content.index.IndexConfigHelper;
+import com.dotcms.content.index.opensearch.OSIndexAPIImpl;
+import com.dotcms.util.IntegrationTestInitService;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.common.db.DotConnect;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.util.Config;
+import com.dotmarketing.util.Logger;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+import javax.enterprise.context.ApplicationScoped;
+import javax.inject.Inject;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+/**
+ * Integration tests that exercise Site Search through the phase-aware {@link SiteSearchAPIImpl}
+ * router in a dual-write phase, where every write fans out to both the
+ * Elasticsearch ({@link ESSiteSearchAPI}) and OpenSearch ({@link OSSiteSearchAPI}) leaves.
+ *
+ *
These tests guard two regressions that only reproduce through the router fan-out — the
+ * isolated {@link com.dotcms.content.index.opensearch.OSSiteSearchAPIIntegrationTest} (which calls
+ * the OS leaf directly) cannot catch them:
+ *
+ *
+ *
Shared mutable result across the fan-out. {@code putToIndex} mutates the
+ * {@link SiteSearchResult} map in place — notably {@link SiteSearchResult#setKeywords(String)}
+ * rewrites the {@code keywords} entry from a {@code String} to a {@code List}. With a single
+ * shared instance, the first leaf (ES) corrupted the input the second leaf (OS) then read,
+ * producing {@code ClassCastException: EmptyList cannot be cast to String} on the OS write —
+ * silently dropping every document from OpenSearch. The router now hands each
+ * provider its own copy. This test asserts the document actually lands in OpenSearch.
+ *
Mapping fan-out leak. {@code createSiteSearchIndex} on the ES leaf applied
+ * its mapping through the phase-dispatched {@code ESMappingAPIImpl.putMapping}, which fanned
+ * out a second time to OpenSearch using a {@code .os}-tagged physical name that site-search OS
+ * indices never use → HTTP 404. The create path is now ES-pinned; this test asserts a
+ * router-driven create yields a fully functional, queryable OS index.
+ *
+ *
+ *
Runs only when ES and OS are separate clusters (dual-write requires two endpoints); skipped
+ * via {@link org.junit.Assume#assumeFalse} on the single-cluster {@code opensearch-upgrade}
+ * profile. Registered in {@link com.dotcms.OpenSearchUpgradeSuite}. Run with:
+ *
+ *
+ * @author Fabrizio Araya
+ */
+@ApplicationScoped
+@RunWith(DataProviderWeldRunner.class)
+public class SiteSearchDualWriteRouterIT extends IntegrationTestBase {
+
+ /** Phase 1 — dual-write, ES reads. Writes fan out to [ES, OS]; reads come from ES. */
+ private static final int PHASE_DUAL_WRITE_ES_READS = 1;
+
+ private static final String RUN_ID =
+ UUID.randomUUID().toString().replace("-", "").substring(0, 8);
+
+ /** Numeric suffix so the name matches the {@code sitesearch_} convention. */
+ private static final String SUFFIX = String.valueOf(Math.abs((long) RUN_ID.hashCode()));
+
+ private static final String IDX = "sitesearch_" + SUFFIX;
+ private static final String DOC_ID = "ss-dualwrite-it-" + RUN_ID;
+
+ @Inject
+ private OSSiteSearchAPI osSiteSearchAPI;
+
+ @Inject
+ private OSIndexAPIImpl osIndexAPI;
+
+ /** The phase-aware fan-out router under test. */
+ private SiteSearchAPI router;
+
+ // =======================================================================
+ // Lifecycle
+ // =======================================================================
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+ }
+
+ @Before
+ public void setUp() {
+ // Dual-write fans out to both clusters; a single-cluster profile would collide on the
+ // shared untagged site-search name (and cannot host both leaves), so skip there.
+ assumeFalse("Requires separate ES and OS clusters for dual-write", esSameAsOs());
+ router = APILocator.getSiteSearchAPI();
+ cleanupTestData();
+ setPhase(PHASE_DUAL_WRITE_ES_READS);
+ }
+
+ @After
+ public void tearDown() {
+ setPhase(null);
+ cleanupTestData();
+ }
+
+ // =======================================================================
+ // Tests
+ // =======================================================================
+
+ /**
+ * Given scenario: Phase 1 (dual-write). An index and a single document with {@code keywords}
+ * set are written through the router, fanning out to ES then OS on the same result instance.
+ * Expected: the document reaches OpenSearch (no {@code ClassCastException} on the OS leaf) and
+ * is searchable through the router's ES read path — proving the dual-write completed on both
+ * backends. {@code keywords} round-trips as a {@code List}.
+ */
+ @Test
+ public void test_dualWritePutToIndex_documentReachesBothBackends() throws Exception {
+ router.createSiteSearchIndex(IDX, null, 1);
+
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID);
+ doc.setUrl("/ss-dualwrite-it/" + RUN_ID);
+ doc.setTitle("Dual-write Site Search IT " + RUN_ID);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms dual write roundtrip " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ // The exact Bug 1 trigger: keywords enters the map as a raw String. The first leaf in the
+ // fan-out rewrites it to a List; the second leaf must not see that mutation.
+ doc.getMap().put("keywords", "alpha, beta");
+
+ router.putToIndex(IDX, doc, "content");
+
+ // Bug 1 — OpenSearch must have received the document (unpatched: ClassCastException → null).
+ final SiteSearchResult fromOs = osSiteSearchAPI.getFromIndex(IDX, DOC_ID);
+ assertNotNull("Document must be retrievable from OpenSearch after dual-write", fromOs);
+ assertEquals("Document id must match in OpenSearch", DOC_ID, fromOs.getId());
+ assertEquals("keywords must round-trip as a trimmed list",
+ List.of("alpha", "beta"), fromOs.getKeywords());
+
+ // The dual-write also reached ES: in Phase 1 the router reads from ES.
+ final SiteSearchResults esRead = router.search(IDX, "roundtrip", 0, 10);
+ assertNull("ES read must not error: " + esRead.getError(), esRead.getError());
+ assertTrue("Document must be searchable via the router's ES read path",
+ esRead.getTotalResults() >= 1);
+
+ Logger.info(this, "✅ test_dualWritePutToIndex_documentReachesBothBackends passed");
+ }
+
+ /**
+ * Given scenario: Phase 1 (dual-write). A batch of documents is written through the
+ * {@code putToIndex(String, List, String)} router overload. This exercises the list fan-out
+ * path, where each provider must receive its own copy of every result.
+ * Expected: every document lands in OpenSearch.
+ */
+ @Test
+ public void test_dualWriteBatchPutToIndex_allDocumentsReachOpenSearch() throws Exception {
+ router.createSiteSearchIndex(IDX, null, 1);
+
+ final List docs = new ArrayList<>();
+ for (int i = 0; i < 3; i++) {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID + "-" + i);
+ doc.setUrl("/ss-dualwrite-batch/" + RUN_ID + "/" + i);
+ doc.setTitle("Batch doc " + i);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms dual write batch sample " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ doc.getMap().put("keywords", "kw" + i + ", shared");
+ docs.add(doc);
+ }
+
+ router.putToIndex(IDX, docs, "content");
+
+ for (int i = 0; i < 3; i++) {
+ final String id = DOC_ID + "-" + i;
+ assertNotNull("Batch document '" + id + "' must reach OpenSearch",
+ osSiteSearchAPI.getFromIndex(IDX, id));
+ }
+
+ Logger.info(this, "✅ test_dualWriteBatchPutToIndex_allDocumentsReachOpenSearch passed");
+ }
+
+ // =======================================================================
+ // Helpers
+ // =======================================================================
+
+ /**
+ * True when the ES and OS clients are configured against the same cluster endpoint (the
+ * single-cluster {@code opensearch-upgrade} profile). Mirrors the gate used by the core
+ * migration ITs.
+ */
+ private static boolean esSameAsOs() {
+ final String esEndpoint = Config.getStringProperty("DOT_ES_ENDPOINTS",
+ "http://localhost:9207");
+ final String osEndpoint = Config.getStringProperty("OS_ENDPOINTS",
+ "http://localhost:9201");
+ return esEndpoint.trim().equalsIgnoreCase(osEndpoint.trim());
+ }
+
+ private static void setPhase(final Integer ordinal) {
+ Config.setProperty(IndexConfigHelper.MigrationPhase.FLAG_KEY,
+ ordinal == null ? null : String.valueOf(ordinal));
+ }
+
+ private synchronized void cleanupTestData() {
+ try {
+ if (osIndexAPI.indexExists(IDX)) {
+ osIndexAPI.delete(IDX);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing OS index '" + IDX + "': " + e.getMessage());
+ }
+ // The dual-write create also lands an ES index; remove it directly on the ES cluster.
+ try {
+ final ESIndexAPI esIndex = ((IndexAPIImpl) APILocator.getESIndexAPI()).esImpl();
+ if (esIndex.indexExists(IDX)) {
+ esIndex.delete(IDX);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing ES index '" + IDX + "': " + e.getMessage());
+ }
+ try {
+ new DotConnect()
+ .setSQL("DELETE FROM indicies WHERE index_name LIKE ?")
+ .addParam("%" + SUFFIX + "%")
+ .loadResult();
+ APILocator.getVersionedIndicesAPI().clearCache();
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing versioned DB rows: " + e.getMessage());
+ }
+ }
+}