diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java index 09f1f54c96b7..25ab67a4ecff 100644 --- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java +++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java @@ -14,6 +14,9 @@ import com.dotcms.content.elasticsearch.business.*; import com.dotcms.content.elasticsearch.util.RestHighLevelClientProvider; import com.dotcms.content.index.IndexAPI; +import com.dotcms.content.index.IndexTag; +import com.dotcms.content.index.domain.Aggregation; +import com.dotcms.content.index.domain.DotSearchException; import com.dotcms.enterprise.LicenseUtil; import com.dotcms.enterprise.license.LicenseLevel; import com.dotcms.enterprise.priv.util.SearchSourceBuilderUtil; @@ -64,7 +67,6 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.aggregations.Aggregation; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; @@ -88,7 +90,11 @@ public ESSiteSearchAPI(final IndexAPI indexApi, } public ESSiteSearchAPI() { - this(APILocator.getESIndexAPI(), new ESMappingAPIImpl(), APILocator.getIndiciesAPI()); + // Use the vendor-specific ESIndexAPI directly (NOT APILocator.getESIndexAPI(), which returns + // the phase-aware IndexAPIImpl router). The SiteSearchAPIImpl router is the single fan-out + // point for the ES → OS migration; routing index ops through the neutral router here as well + // would dual-write a second time and create duplicate OpenSearch indices. + this(new ESIndexAPI(), new ESMappingAPIImpl(), APILocator.getIndiciesAPI()); } /** @@ -351,7 +357,7 @@ public void deactivateIndex(String indexName) throws DotDataException, IOExcepti } @Override - public synchronized boolean createSiteSearchIndex(String indexName, String alias, int shards) throws ElasticsearchException, IOException { + public synchronized boolean createSiteSearchIndex(String indexName, String alias, int shards) throws DotSearchException, IOException { if(indexName==null){ return false; } @@ -379,7 +385,7 @@ public synchronized boolean createSiteSearchIndex(String indexName, String alias } if(i++ > 300){ - throw new ElasticsearchException("index timed out creating"); + throw new DotSearchException("index timed out creating"); } } @@ -387,8 +393,12 @@ public synchronized boolean createSiteSearchIndex(String indexName, String alias indexApi.createAlias(indexName, alias); } - //put mappings - mappingAPI.putMapping(indexName, mapping); + // Put mappings on the ES index only. ESMappingAPIImpl.putMapping(String, String) is + // phase-dispatched and would fan out to OpenSearch, but SiteSearchAPIImpl is already the + // single fan-out point for site search (it invokes OSSiteSearchAPI separately, which owns + // its own untagged OS index + mapping). Fanning out here too would re-issue the mapping to + // a `.os`-tagged physical name that site-search OS indices never use → HTTP 404. Pin to ES. + mappingAPI.putMapping(List.of(indexName), mapping, IndexTag.ES); return true; } @@ -634,7 +644,7 @@ public Map getAggregations ( String indexName, String query } if ( indexName == null || !IndexType.SITE_SEARCH.is(indexName) ) { - throw new ElasticsearchException( indexName + " is not a sitesearch index or alias" ); + throw new DotSearchException( indexName + " is not a sitesearch index or alias" ); } //https://github.com/elasticsearch/elasticsearch/issues/2980 @@ -648,10 +658,10 @@ public Map getAggregations ( String indexName, String query .timeout(TimeValue.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS))); final SearchResponse response = client.search(request, RequestOptions.DEFAULT); - return response.getAggregations().asMap(); + return Aggregation.from(response.getAggregations()); } catch ( ElasticsearchException | IOException e ) { Logger.error( this.getClass(), "Error getting aggregations for query.\n" + e.getMessage(), e ); - throw new ElasticsearchException( "Error getting aggregations for query.\n" + e.getMessage(), e ); + throw new DotSearchException( "Error getting aggregations for query.\n" + e.getMessage(), e ); } } @@ -669,7 +679,7 @@ public Map getFacets ( String indexName, String query ) thr } if ( indexName == null || !IndexType.SITE_SEARCH.is(indexName ) ) { - throw new ElasticsearchException( indexName + " is not a sitesearch index or alias" ); + throw new DotSearchException( indexName + " is not a sitesearch index or alias" ); } //https://github.com/elasticsearch/elasticsearch/issues/2980 @@ -683,10 +693,10 @@ public Map getFacets ( String indexName, String query ) thr .timeout(TimeValue.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS))); final SearchResponse response = client.search(request, RequestOptions.DEFAULT); - return response.getAggregations().asMap(); + return Aggregation.from(response.getAggregations()); } catch ( ElasticsearchException | IOException e ) { Logger.error( this.getClass(), "Error getting Facets for query.\n" + e.getMessage(), e ); - throw new ElasticsearchException( "Error getting Facets for query.\n" + e.getMessage(), e ); + throw new DotSearchException( "Error getting Facets for query.\n" + e.getMessage(), e ); } } diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java new file mode 100644 index 000000000000..3196bb222d3f --- /dev/null +++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java @@ -0,0 +1,848 @@ +/* +* +* Copyright (c) 2025 dotCMS LLC +* Use of this software is governed by the Business Source License included +* in the LICENSE file found at in the root directory of software. +* SPDX-License-Identifier: BUSL-1.1 +* +*/ + +package com.dotcms.enterprise.publishing.sitesearch; + +import com.dotcms.cdi.CDIUtils; +import com.dotcms.content.elasticsearch.business.ContentletIndexAPIImpl; +import com.dotcms.content.elasticsearch.business.ESMappingAPIImpl; +import com.dotcms.content.elasticsearch.business.IndexType; +import com.dotcms.content.index.IndexAPI; +import com.dotcms.content.index.IndexTag; +import com.dotcms.content.index.VersionedIndices; +import com.dotcms.content.index.VersionedIndicesAPI; +import com.dotcms.content.index.VersionedIndicesImpl; +import com.dotcms.content.index.domain.Aggregation; +import com.dotcms.content.index.domain.ContentSearchResponse; +import com.dotcms.content.index.domain.DotSearchException; +import com.dotcms.content.index.domain.SearchHit; +import com.dotcms.content.index.domain.SearchHits; +import com.dotcms.content.index.opensearch.OSClientProvider; +import com.dotcms.content.index.opensearch.OSIndexAPIImpl; +import com.dotcms.enterprise.LicenseUtil; +import com.dotcms.enterprise.license.LicenseLevel; +import com.dotcms.publishing.job.SiteSearchJobProxy; +import com.dotmarketing.business.APILocator; +import com.dotmarketing.exception.DotDataException; +import com.dotmarketing.exception.DotRuntimeException; +import com.dotmarketing.quartz.CronScheduledTask; +import com.dotmarketing.quartz.QuartzUtils; +import com.dotmarketing.quartz.ScheduledTask; +import com.dotmarketing.quartz.TaskRuntimeValues; +import com.dotmarketing.sitesearch.business.SiteSearchAPI; +import com.dotmarketing.util.Logger; +import com.dotmarketing.util.StringUtils; +import com.dotmarketing.util.UUIDGenerator; +import com.dotmarketing.util.UtilMethods; +import com.dotmarketing.util.json.JSONArray; +import com.dotmarketing.util.json.JSONException; +import com.dotmarketing.util.json.JSONObject; +import com.google.common.annotations.VisibleForTesting; +import io.vavr.control.Try; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import javax.enterprise.context.ApplicationScoped; +import javax.enterprise.inject.Default; +import javax.inject.Inject; +import org.opensearch.client.json.JsonpDeserializer; +import org.opensearch.client.json.JsonpMapper; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.core.GetResponse; +import org.opensearch.client.opensearch.core.SearchResponse; +import org.opensearch.client.opensearch.generic.Bodies; +import org.opensearch.client.opensearch.generic.Body; +import org.opensearch.client.opensearch.generic.Requests; +import org.opensearch.client.opensearch.generic.Response; +import org.quartz.SchedulerException; + +/** + * OpenSearch implementation of {@link SiteSearchAPI}. + * + *

Vendor-specific counterpart to {@link ESSiteSearchAPI}. The two implementations are kept + * functionally symmetric and are selected at runtime by the {@link SiteSearchAPIImpl} router based + * on the migration phase. This class confines every {@code org.opensearch.*} type to its private + * helpers — the {@link SiteSearchAPI} contract it implements is vendor-neutral.

+ * + *

Index source of truth

+ *

Where {@link ESSiteSearchAPI} reads the active site-search index name from the legacy + * {@code IndiciesAPI}, this class uses {@link VersionedIndicesAPI} — the canonical OpenSearch index + * registry — via the {@code siteSearch} slot of the default ({@link VersionedIndices#OPENSEARCH_3X}) + * versioned indices. Index lifecycle operations (create/list/delete/alias) are delegated to + * the OpenSearch {@link IndexAPI} provider ({@link OSIndexAPIImpl}) directly rather than the neutral + * router, because the {@link SiteSearchAPIImpl} router is already the single phase-aware fan-out point + * — routing through the neutral {@code IndexAPI} router here would dual-write a second time.

+ * + *

Index naming

+ *

Site-search index names are handled as plain logical names (e.g. {@code sitesearch_1718000000000}), + * exactly as in {@link ESSiteSearchAPI}: the cluster-id prefix is added only when a name reaches the + * OpenSearch client (via {@link IndexAPI#getNameWithClusterIDPrefix(String)}). The {@code .os} + * {@link com.dotcms.content.index.IndexTag} is intentionally not applied to site-search indices — + * production ES and OS run on separate clusters, and the site-search pointer lives in its own + * {@code siteSearch} slot, so there is no shared-name collision to disambiguate. + * TODO OS: revisit if single-cluster dual-write of site-search is ever required (then tag with + * {@code IndexTag.OS}).

+ * + * @author Fabrizio Araya + * @see ESSiteSearchAPI + * @see SiteSearchAPIImpl + * @see com.dotcms.content.index.opensearch.OSSearchAPIImpl + */ +@ApplicationScoped +@Default +public class OSSiteSearchAPI implements SiteSearchAPI { + + /** + * Response deserializer with {@code TDocument} bound to {@code Object} (JSON objects become + * {@code Map}). The query body is sent through the low-level (generic) client so nested + * sub-aggregations are preserved; the bare {@code SearchResponse._DESERIALIZER} has no document + * deserializer bound and would fail on a hit carrying a {@code _source}. Mirrors + * {@link com.dotcms.content.index.opensearch.OSSearchAPIImpl}. + */ + private static final JsonpDeserializer> SEARCH_RESPONSE_DESERIALIZER = + SearchResponse.createSearchResponseDeserializer(JsonpDeserializer.of(Object.class)); + + private final OSClientProvider clientProvider; + private final IndexAPI indexApi; + + /** CDI-managed constructor. */ + @Inject + public OSSiteSearchAPI() { + this(CDIUtils.getBeanThrows(OSClientProvider.class), + CDIUtils.getBeanThrows(OSIndexAPIImpl.class)); + } + + /** Package-private constructor for testing. */ + @VisibleForTesting + OSSiteSearchAPI(final OSClientProvider clientProvider, + final IndexAPI indexApi) { + this.clientProvider = clientProvider; + this.indexApi = indexApi; + } + + // ========================================================================= + // Index listing + // ========================================================================= + + @Override + public List listIndices() { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return Collections.emptyList(); + } + final List indices = indexApi.listIndices().stream() + .filter(IndexType.SITE_SEARCH::is) + .collect(Collectors.toList()); + + Collections.sort(indices); + Collections.reverse(indices); + setDefaultToSpecificPosition(indices, 0); + return indices; + } + + /** + * Moves the active (default) site-search index to {@code indexPosition} of the list, mirroring + * {@link ESSiteSearchAPI} but resolving the default from {@link VersionedIndicesAPI}. + */ + private void setDefaultToSpecificPosition(final List list, final int indexPosition) { + if (list == null || list.size() <= 1) { + return; + } + final String defaultIndice = defaultSiteSearchIndex().orElse(null); + if (UtilMethods.isSet(defaultIndice) && !list.isEmpty()) { + final int index = list.indexOf(defaultIndice); + if (index < 0) { + Logger.warn(this.getClass(), String.format( + "The default site search '%s' index was not found in the list of indices.", + defaultIndice)); + } else { + list.remove(index); + list.add(indexPosition, defaultIndice); + } + } + } + + @Override + public List listClosedIndices() { + final List indices = new ArrayList<>(); + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return indices; + } + for (final String indexName : indexApi.getClosedIndexes()) { + if (IndexType.SITE_SEARCH.is(indexName)) { + indices.add(indexName); + } + } + Collections.sort(indices); + Collections.reverse(indices); + return indices; + } + + // ========================================================================= + // Search & aggregations + // ========================================================================= + + @Override + public SiteSearchResults search(final String query, final int start, final int rows) { + final SiteSearchResults results = new SiteSearchResults(); + if (query == null) { + results.setError("null query"); + return results; + } + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return results; + } + try { + return search(defaultSiteSearchIndex().orElse(null), query, start, rows); + } catch (final Exception e) { + results.setError(e.getMessage()); + } + return results; + } + + @Override + public SiteSearchResults search(String indexName, String query, final int offset, final int limit) { + if (!UtilMethods.isSet(query)) { + query = "*"; + } + final SiteSearchResults results = new SiteSearchResults(); + + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return results; + } + + final boolean isJson = StringUtils.isJson(query); + + //https://github.com/elasticsearch/elasticsearch/issues/2980 + if (query.contains("/")) { + query = query.replaceAll("/", "\\\\/"); + } + + results.setQuery(query); + results.setLimit(limit); + results.setOffset(offset); + + try { + if (indexName == null) { + indexName = defaultSiteSearchIndex().orElse(null); + } + if (!IndexType.SITE_SEARCH.is(indexName)) { + throw new DotSearchException(indexName + " is not a sitesearch index"); + } + results.setIndex(indexName); + + final JSONObject body; + if (!isJson) { + body = new JSONObject(); + body.put("query", new JSONObject().put("query_string", + new JSONObject().put("query", query).put("default_field", "*"))); + if (limit > 0) { + body.put("size", limit); + } + if (offset > 0) { + body.put("from", offset); + } + body.put("highlight", new JSONObject().put("fields", + new JSONObject().put("content", new JSONObject().put("fragment_size", 255)))); + } else { + body = new JSONObject(query); + } + + final ContentSearchResponse response = rawSearch(physicalName(indexName), body); + results.setTook(response.tookMillis() + "ms"); + if (!isJson) { + results.setQuery(body.toString()); + } + + final SearchHits hits = response.hits(); + results.setTotalResults(hits.getTotalHits().value()); + + float maxScore = 0f; + for (final SearchHit hit : hits) { + final SiteSearchResult ssr = new SiteSearchResult(new HashMap<>(hit.getSourceAsMap())); + ssr.setScore(hit.getScore()); + maxScore = Math.max(maxScore, hit.getScore()); + // TODO OS: the neutral SearchHit DTO does not carry per-field highlights yet. + // Site-search highlights are a best-effort extra (the ES path also swallows + // highlight failures); set empty until the neutral hit exposes highlight fragments. + ssr.setHighLight(new String[0]); + results.getResults().add(ssr); + } + results.setMaxScore(maxScore); + + } catch (final Exception e) { + Logger.error(OSSiteSearchAPI.class, e.getMessage(), e); + results.setError(e.getMessage()); + } + + return results; + } + + @Override + public Map getAggregations(String indexName, String query) + throws DotDataException { + indexName = resolveIndexOrAlias(indexName); + if (indexName == null || !IndexType.SITE_SEARCH.is(indexName)) { + throw new DotSearchException(indexName + " is not a sitesearch index or alias"); + } + + //https://github.com/elasticsearch/elasticsearch/issues/2980 + if (query.contains("/")) { + query = query.replaceAll("/", "\\\\\\\\/"); + } + + try { + final ContentSearchResponse response = rawSearch(physicalName(indexName), new JSONObject(query)); + return response.aggregationTree(); + } catch (final Exception e) { + Logger.error(this.getClass(), "Error getting aggregations for query.\n" + e.getMessage(), e); + throw new DotSearchException("Error getting aggregations for query.\n" + e.getMessage(), e); + } + } + + /** + * {@inheritDoc} + * + * @deprecated use {@link #getAggregations(String, String)} instead. + */ + @Deprecated + @Override + public Map getFacets(String indexName, String query) throws DotDataException { + indexName = resolveIndexOrAlias(indexName); + if (indexName == null || !IndexType.SITE_SEARCH.is(indexName)) { + throw new DotSearchException(indexName + " is not a sitesearch index or alias"); + } + + //https://github.com/elasticsearch/elasticsearch/issues/2980 + if (query.contains("/")) { + query = query.replaceAll("/", "\\\\\\\\/"); + } + + try { + final ContentSearchResponse response = rawSearch(physicalName(indexName), new JSONObject(query)); + return response.aggregationTree(); + } catch (final Exception e) { + Logger.error(this.getClass(), "Error getting Facets for query.\n" + e.getMessage(), e); + throw new DotSearchException("Error getting Facets for query.\n" + e.getMessage(), e); + } + } + + // ========================================================================= + // Default index activation / inspection + // ========================================================================= + + @Override + public boolean isDefaultIndex(final String indexName) throws DotDataException { + return indexName != null && indexName.equals(defaultSiteSearchIndex().orElse(null)); + } + + @Override + public void activateIndex(final String indexName) throws DotDataException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + if (!IndexType.SITE_SEARCH.is(indexName)) { + return; + } + final VersionedIndicesImpl.Builder builder = copyDefaultIndices(); + builder.siteSearch(indexName); + saveDefaultIndices(builder); + } + + @Override + public void deactivateIndex(final String indexName) throws DotDataException, IOException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + if (!IndexType.SITE_SEARCH.is(indexName)) { + return; + } + // Rebuild the default indices without the site-search slot. saveIndices() does a + // delete-by-version then re-insert, so omitting the slot clears the pointer while preserving + // the content live/working rows. If site-search was the ONLY slot for this version, the + // rebuilt info would be empty (saveIndices rejects empty), so drop the version row instead. + final VersionedIndicesImpl rebuilt = copyDefaultIndicesExceptSiteSearch().build(); + final VersionedIndicesAPI api = APILocator.getVersionedIndicesAPI(); + if (rebuilt.hasAnyIndex()) { + api.saveIndices(rebuilt); + } else { + api.removeVersion(rebuilt.version()); + } + api.clearCache(); + } + + // ========================================================================= + // Index creation / mapping + // ========================================================================= + + @Override + public synchronized boolean createSiteSearchIndex(String indexName, final String alias, final int shards) + throws DotSearchException, IOException { + if (indexName == null) { + return false; + } + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return false; + } + + indexName = indexName.toLowerCase(); + final ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + // OpenSearch-format settings: the legacy es-sitesearch-settings.json uses ES-only token + // filter syntax (e.g. edgeNGram / side) that the typed OpenSearch IndexSettings deserializer + // rejects. os-sitesearch-settings.json declares the same analyzers (standard_content, + // partial_content) in OpenSearch syntax. The mapping is vendor-neutral and is reused as-is. + URL url = classLoader.getResource("os-sitesearch-settings.json"); + final String settings = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath()))); + url = classLoader.getResource("es-sitesearch-mapping.json"); + final String mapping = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath()))); + + try { + indexApi.createIndex(indexName, settings, shards); + } catch (final Exception e) { + throw new DotSearchException("Error creating OpenSearch site search index: " + e.getMessage(), e); + } + + if (UtilMethods.isSet(alias)) { + indexApi.createAlias(indexName, alias); + } + + putMapping(indexName, mapping); + + return true; + } + + /** + * Applies the mapping to the site-search index via a raw {@code PUT //_mapping}. + * + *

Done here rather than via {@code MappingOperationsOS} on purpose: that helper force-tags the + * physical name with {@code .os}, which would target a different index than the untagged one this + * class creates and queries (see the class "Index naming" note), leaving the real index on the + * dynamic default mapping (string fields become {@code text}, which then breaks keyword + * aggregations such as {@code mimeType}). Forwarding to the same untagged physical name used by + * {@code createIndex}/search/put keeps the mapping on the index that is actually hit.

+ */ + private void putMapping(final String indexName, final String mapping) throws DotSearchException { + final String endpoint = "/" + physicalName(indexName) + "/_mapping"; + try (final Response response = clientProvider.getClient().generic() + .execute(Requests.builder() + .method("PUT") + .endpoint(endpoint) + .body(Bodies.json(mapping)) + .build())) { + final int status = response.getStatus(); + if (status < 200 || status >= 300) { + throw new DotSearchException("Error applying mapping to OpenSearch site search index " + + indexName + " — HTTP " + status + " — " + + response.getBody().map(Body::bodyAsString).orElse("")); + } + } catch (final IOException e) { + throw new DotSearchException("Error applying mapping to OpenSearch site search index: " + + e.getMessage(), e); + } + } + + @Override + public synchronized boolean setAlias(String indexName, final String alias) { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return false; + } + if (UtilMethods.isNotSet(indexName) || UtilMethods.isNotSet(alias)) { + throw new IllegalArgumentException(String.format( + " either one or both params aren't set. index: `%s`, alias: `%s` ", indexName, alias)); + } + indexName = indexName.toLowerCase(); + indexApi.createAlias(indexName, alias); + return false; + } + + /** + * Mirrors {@link ESSiteSearchAPI#deleteOldSiteSearchIndices()} but resolves the active index from + * {@link VersionedIndicesAPI} and deletes through the OpenSearch {@link IndexAPI} provider. + */ + @Override + public void deleteOldSiteSearchIndices() { + final List indicesToRemove = new ArrayList<>(listIndices()); + + // Keep the default (active) site-search index. + defaultSiteSearchIndex().ifPresent(indicesToRemove::remove); + + // Keep any index that backs an alias. + final List indicesWithAlias = + new ArrayList<>(indexApi.getIndexAlias(indicesToRemove).keySet()); + indicesToRemove.removeAll(indicesWithAlias); + + // Keep indices created within the last 24 hours. + final Date yesterday = Date.from(Instant.now().minus(Duration.ofDays(1))); + final long yesterdayTimestamp = + Long.parseLong(ContentletIndexAPIImpl.timestampFormatter.format(yesterday)); + + final List recent = new ArrayList<>(); + for (final String index : indicesToRemove) { + try { + final long indexTimestamp = Long.parseLong(index.split("_")[1]); + if (indexTimestamp >= yesterdayTimestamp) { + recent.add(index); + } + } catch (final RuntimeException e) { + Logger.warn(this.getClass(), + "Unable to parse timestamp from site search index '" + index + "': " + e.getMessage()); + } + } + indicesToRemove.removeAll(recent); + + if (!indicesToRemove.isEmpty()) { + Logger.info(this.getClass(), + "The following indices will be deleted: " + String.join(",", indicesToRemove)); + indexApi.deleteMultiple(indicesToRemove.toArray(new String[0])); + } + } + + // ========================================================================= + // Document operations + // ========================================================================= + + @Override + public void putToIndex(final String idx, final SiteSearchResult res, final String resultType) { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + try { + if (res.getContentLength() == 0) { + return; + } + if (res.getTitle() == null && res.getFileName() != null) { + res.setTitle(res.getFileName()); + } + + // Strip HTML out of text content. + if (res.getContent() != null && UtilMethods.isSet(res.getMimeType()) + && res.getMimeType().contains("text/")) { + res.getMap().put("content_raw", res.getContent()); + res.setContent(res.getContent().replaceAll("\\<.*?\\>", "")); + } + + String desc = res.getDescription(); + if (!UtilMethods.isSet(res.getDescription()) && UtilMethods.isSet(res.getContent())) { + desc = UtilMethods.prettyShortenString(res.getContent(), 500); + } + res.setDescription(desc); + + if (res.getMap().containsKey("keywords") && res.getMap().containsKey("seokeywords")) { + res.setKeywords((String) res.getMap().get("seokeywords")); + } else { + res.setKeywords((String) res.getMap().get("keywords")); + } + + Logger.info(this.getClass(), + "writing from : " + idx + " type: " + resultType + " url:" + res.getUrl()); + final String json = new ESMappingAPIImpl().toJsonString(res.getMap()); + + final String endpoint = "/" + physicalName(idx) + "/_doc/" + res.getId(); + try (final Response response = clientProvider.getClient().generic() + .execute(Requests.builder() + .method("PUT") + .endpoint(endpoint) + .query(Map.of("refresh", "true")) + .body(Bodies.json(json)) + .build())) { + final int status = response.getStatus(); + if (status < 200 || status >= 300) { + Logger.error(this.getClass(), "putToIndex failed for doc " + res.getId() + + " — HTTP " + status); + } + } + } catch (final Exception e) { + Logger.error(OSSiteSearchAPI.class, e.getMessage(), e); + } + } + + @Override + public void putToIndex(final String idx, final List res, final String resultType) { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + for (final SiteSearchResult r : res) { + putToIndex(idx, r, resultType); + } + } + + @Override + @SuppressWarnings({"unchecked", "rawtypes"}) + public SiteSearchResult getFromIndex(final String index, final String id) { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return null; + } + try { + final String physical = physicalName(index); + final GetResponse response = clientProvider.getClient() + .get(g -> g.index(physical).id(id), Map.class); + if (response.found() && response.source() != null) { + final SiteSearchResult ssr = new SiteSearchResult(new HashMap<>(response.source())); + ssr.setScore(1); + return ssr; + } + } catch (final Exception e) { + Logger.error(OSSiteSearchAPI.class, e.getMessage(), e); + } + return null; + } + + @Override + public void deleteFromIndex(final String idx, final String docId) { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + try { + Logger.info(this.getClass(), "deleting from : " + idx + " url:" + docId); + final String endpoint = "/" + physicalName(idx) + "/_doc/" + docId; + try (final Response response = clientProvider.getClient().generic() + .execute(Requests.builder() + .method("DELETE") + .endpoint(endpoint) + .query(Map.of("refresh", "true")) + .build())) { + final int status = response.getStatus(); + // 404 is benign — the document was already absent. + if (status >= 400 && status != 404) { + Logger.error(this.getClass(), "deleteFromIndex failed for doc " + docId + + " — HTTP " + status); + } + } + } catch (final Exception e) { + Logger.error(OSSiteSearchAPI.class, e.getMessage(), e); + } + } + + // ========================================================================= + // Quartz task scheduling — vendor-independent (identical to ESSiteSearchAPI) + // ========================================================================= + + @Override + public List getTasks() throws SchedulerException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return null; + } + return QuartzUtils.getScheduledTasks(ES_SITE_SEARCH_NAME); + } + + @Override + public ScheduledTask getTask(final String taskName) throws SchedulerException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return null; + } + for (final ScheduledTask task : getTasks()) { + if (task.getJobName() != null && task.getJobName().equals(taskName)) { + return task; + } + } + return null; + } + + @Override + public void scheduleTask(final SiteSearchConfig config) + throws SchedulerException, ParseException, ClassNotFoundException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + final String name = config.getJobName(); + final String cronString = config.getCronExpression(); + + if (config.getJobId() == null) { + config.setJobId(UUIDGenerator.generateUuid()); + } + + final ScheduledTask task = new CronScheduledTask(name, ES_SITE_SEARCH_NAME, "Site Search ", + SiteSearchJobProxy.class.getCanonicalName(), new Date(), null, 1, config, cronString); + task.setSequentialScheduled(true); + + QuartzUtils.scheduleTask(task); + } + + @Override + public void deleteTask(final String taskName) throws SchedulerException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + final ScheduledTask t = getTask(taskName); + QuartzUtils.pauseJob(t.getJobName(), ES_SITE_SEARCH_NAME); + QuartzUtils.removeTaskRuntimeValues(t.getJobName(), ES_SITE_SEARCH_NAME); + QuartzUtils.removeJob(t.getJobName(), ES_SITE_SEARCH_NAME); + } + + @Override + public void pauseTask(final String taskName) throws SchedulerException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + final ScheduledTask t = getTask(taskName); + QuartzUtils.pauseJob(t.getJobName(), ES_SITE_SEARCH_NAME); + } + + @Override + public SiteSearchPublishStatus getTaskProgress(final String taskName) throws SchedulerException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return null; + } + final TaskRuntimeValues trv = QuartzUtils.getTaskRuntimeValues(taskName, ES_SITE_SEARCH_NAME); + if (!(trv instanceof SiteSearchPublishStatus)) { + QuartzUtils.setTaskRuntimeValues(taskName, ES_SITE_SEARCH_NAME, new SiteSearchPublishStatus()); + } + return (SiteSearchPublishStatus) QuartzUtils.getTaskRuntimeValues(taskName, ES_SITE_SEARCH_NAME); + } + + @Override + public boolean isTaskRunning(final String jobName) throws SchedulerException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return false; + } + return QuartzUtils.isJobRunning(jobName, ES_SITE_SEARCH_NAME); + } + + @Override + public void executeTaskNow(final SiteSearchConfig config) + throws SchedulerException, ParseException, ClassNotFoundException { + if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) { + return; + } + final Calendar cal = Calendar.getInstance(); + cal.add(Calendar.SECOND, 10); + final String cron = new SimpleDateFormat("ss mm H d M ? yyyy").format(cal.getTime()); + config.setCronExpression(cron); + scheduleTask(config); + } + + // ========================================================================= + // Private helpers + // ========================================================================= + + /** + * The physical index name to use against the OpenSearch client: the cluster-id prefix is applied, + * matching how {@link OSIndexAPIImpl} builds its requests. No {@code .os} tag is added (see the + * class-level "Index naming" note). + */ + private String physicalName(final String indexName) { + return indexApi.getNameWithClusterIDPrefix(indexName); + } + + /** + * Resolves a site-search index name or alias to the backing index name, mirroring the + * alias-fallback in {@link ESSiteSearchAPI#getAggregations(String, String)}. + */ + private String resolveIndexOrAlias(String indexName) throws DotDataException { + if (indexName == null) { + indexName = defaultSiteSearchIndex().orElse(null); + } + if (indexName != null && !indexApi.indexExists(indexName)) { + // try using it as an alias + indexName = indexApi.getAliasToIndexMap(listIndices()).get(indexName); + } + return indexName; + } + + /** + * The active site-search index name from the default OpenSearch versioned indices, as a logical + * (untagged) name. + * + *

{@link VersionedIndicesAPI} canonicalises stored names to the {@code .os}-tagged form, so the + * raw value carries the tag. Site-search indices are handled in logical space everywhere else + * (the physical OpenSearch index itself is created without the tag — see the class "Index naming" + * note), so the tag is stripped here to keep comparisons and list lookups consistent.

+ */ + private Optional defaultSiteSearchIndex() { + return loadDefaultIndices().flatMap(VersionedIndices::siteSearch).map(IndexTag::strip); + } + + private Optional loadDefaultIndices() { + return Try.of(() -> APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices()) + .getOrElse(Optional.empty()); + } + + /** Builder seeded with every present slot of the default versioned indices. */ + private VersionedIndicesImpl.Builder copyDefaultIndices() { + final VersionedIndicesImpl.Builder builder = VersionedIndicesImpl.builder(); + loadDefaultIndices().ifPresent(info -> { + builder.version(info.version()); + info.live().ifPresent(builder::live); + info.working().ifPresent(builder::working); + info.reindexLive().ifPresent(builder::reindexLive); + info.reindexWorking().ifPresent(builder::reindexWorking); + info.siteSearch().ifPresent(builder::siteSearch); + }); + return builder; + } + + /** Builder seeded with every present slot of the default versioned indices except site-search. */ + private VersionedIndicesImpl.Builder copyDefaultIndicesExceptSiteSearch() { + final VersionedIndicesImpl.Builder builder = VersionedIndicesImpl.builder(); + loadDefaultIndices().ifPresent(info -> { + builder.version(info.version()); + info.live().ifPresent(builder::live); + info.working().ifPresent(builder::working); + info.reindexLive().ifPresent(builder::reindexLive); + info.reindexWorking().ifPresent(builder::reindexWorking); + }); + return builder; + } + + private void saveDefaultIndices(final VersionedIndicesImpl.Builder builder) throws DotDataException { + final VersionedIndicesAPI api = APILocator.getVersionedIndicesAPI(); + api.saveIndices(builder.build()); + api.clearCache(); + } + + /** + * Executes a raw JSON search body against {@code physicalIndex} through the low-level (generic) + * client and maps the response to the neutral {@link ContentSearchResponse}. The body is forwarded + * verbatim (rather than round-tripped through the typed {@code SearchRequest} model) so nested + * sub-aggregations are preserved; mirrors + * {@link com.dotcms.content.index.opensearch.OSSearchAPIImpl}. + */ + private ContentSearchResponse rawSearch(final String physicalIndex, final JSONObject body) { + final OpenSearchClient client = clientProvider.getClient(); + final JsonpMapper mapper = client._transport().jsonpMapper(); + try (final Response response = client.generic().execute(Requests.builder() + .method("POST") + .endpoint("/" + physicalIndex + "/_search") + .query(Map.of("typed_keys", "true")) + .json(body.toString()) + .build())) { + + final int status = response.getStatus(); + final Body responseBody = response.getBody().orElseThrow(() -> new DotSearchException( + "OS site search returned an empty body (HTTP " + status + ")")); + + if (status < 200 || status >= 300) { + throw new DotSearchException( + "OS site search failed: HTTP " + status + " — " + responseBody.bodyAsString()); + } + + try (final InputStream is = responseBody.body(); + final jakarta.json.stream.JsonParser parser = mapper.jsonProvider().createParser(is)) { + final SearchResponse searchResponse = + SEARCH_RESPONSE_DESERIALIZER.deserialize(parser, mapper); + return ContentSearchResponse.from(searchResponse); + } + } catch (final IOException e) { + throw new DotSearchException("OS site search execution failed: " + e.getMessage(), e); + } + } +} diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java new file mode 100644 index 000000000000..7893f2b48dad --- /dev/null +++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java @@ -0,0 +1,315 @@ +/* +* +* Copyright (c) 2025 dotCMS LLC +* Use of this software is governed by the Business Source License included +* in the LICENSE file found at in the root directory of software. +* SPDX-License-Identifier: BUSL-1.1 +* +*/ + +package com.dotcms.enterprise.publishing.sitesearch; + +import com.dotcms.cdi.CDIUtils; +import com.dotcms.content.index.PhaseRouter; +import com.dotcms.content.index.domain.Aggregation; +import com.dotcms.content.index.domain.DotSearchException; +import com.dotcms.content.model.annotation.IndexLibraryIndependent; +import com.dotcms.content.model.annotation.IndexRouter; +import com.dotcms.content.model.annotation.IndexRouter.IndexAccess; +import com.dotmarketing.exception.DotDataException; +import com.dotmarketing.quartz.ScheduledTask; +import com.dotmarketing.sitesearch.business.SiteSearchAPI; +import java.io.IOException; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.quartz.SchedulerException; + +/** + * Phase-aware router implementation of {@link SiteSearchAPI}. + * + *

Single entry point for Site Search during the Elasticsearch → OpenSearch migration. It owns no + * business logic — every call is delegated to the active provider(s) chosen by {@link PhaseRouter} + * according to the migration phase, mirroring {@link com.dotcms.content.index.IndexAPIImpl}.

+ * + *
+ * Phase                     | Read provider | Write providers
+ * --------------------------|---------------|-----------------
+ * 0 — not started           | ES            | [ES]
+ * 1 — dual-write, ES reads  | ES            | [ES, OS]
+ * 2 — dual-write, OS reads  | OS            | [ES, OS]
+ * 3 — OS only               | OS            | [OS]
+ * 
+ * + *

Why this router is the single fan-out point

+ *

{@link ESSiteSearchAPI} and {@link OSSiteSearchAPI} each talk to their own vendor's index API + * directly ({@code ESIndexAPI} / {@code OSIndexAPIImpl}) rather than the neutral {@code IndexAPI} + * router. If they used the neutral router, a write here would fan out twice (once per provider, each + * of which would itself dual-write), creating duplicate indices. Routing happens in exactly one place: + * here.

+ * + *

Routing categories

+ *
    + *
  • Document/index reads ({@code search}, {@code getFromIndex}, + * {@code getAggregations}, {@code getFacets}, {@code isDefaultIndex}) → read provider.
  • + *
  • Document/index writes ({@code putToIndex}, {@code deleteFromIndex}, + * {@code createSiteSearchIndex}, {@code setAlias}, {@code activateIndex}, + * {@code deactivateIndex}, {@code deleteOldSiteSearchIndices}) → write fan-out.
  • + *
  • Aggregating reads ({@code listIndices}, {@code listClosedIndices}) → in + * dual-write phases the two providers each own a distinct physical index set, so results are + * merged (deduplicated) rather than selecting one provider.
  • + *
  • Quartz scheduling ({@code scheduleTask}, {@code deleteTask}, + * {@code pauseTask}, {@code executeTaskNow}, {@code getTasks}, {@code getTask}, + * {@code getTaskProgress}, {@code isTaskRunning}) → these touch the shared Quartz scheduler, + * NOT a search backend. They are routed to a single provider so a job is never scheduled twice. + * The job itself, when it runs, calls {@code putToIndex} through this router and therefore still + * dual-writes documents.
  • + *
+ * + * @author Fabrizio Araya + * @see PhaseRouter + * @see ESSiteSearchAPI + * @see OSSiteSearchAPI + */ +@IndexLibraryIndependent +@IndexRouter(access = {IndexAccess.READ, IndexAccess.WRITE}) +public class SiteSearchAPIImpl implements SiteSearchAPI { + + private final SiteSearchAPI esImpl; + private final SiteSearchAPI osImpl; + private final PhaseRouter router; + + public SiteSearchAPIImpl() { + this(new ESSiteSearchAPI(), CDIUtils.getBeanThrows(OSSiteSearchAPI.class)); + } + + /** + * Package-private constructor for testing. + */ + SiteSearchAPIImpl(final SiteSearchAPI esImpl, final SiteSearchAPI osImpl) { + this.esImpl = esImpl; + this.osImpl = osImpl; + this.router = new PhaseRouter<>(esImpl, osImpl); + } + + // ------------------------------------------------------------------------- + // Aggregating reads — merge both providers in dual-write phases + // ------------------------------------------------------------------------- + + @Override + public List listIndices() { + final List providers = router.writeProviders(); + if (providers.size() == 1) { + return providers.getFirst().listIndices(); + } + final Set merged = new LinkedHashSet<>(esImpl.listIndices()); + merged.addAll(osImpl.listIndices()); + return new ArrayList<>(merged); + } + + @Override + public List listClosedIndices() { + final List providers = router.writeProviders(); + if (providers.size() == 1) { + return providers.getFirst().listClosedIndices(); + } + final Set merged = new LinkedHashSet<>(esImpl.listClosedIndices()); + merged.addAll(osImpl.listClosedIndices()); + return new ArrayList<>(merged); + } + + // ------------------------------------------------------------------------- + // Reads — read provider + // ------------------------------------------------------------------------- + + @Override + public SiteSearchResults search(final String query, final int start, final int rows) { + return router.read(impl -> impl.search(query, start, rows)); + } + + @Override + public SiteSearchResults search(final String indexName, final String query, final int start, + final int rows) { + return router.read(impl -> impl.search(indexName, query, start, rows)); + } + + @Override + public SiteSearchResult getFromIndex(final String index, final String id) { + return router.read(impl -> impl.getFromIndex(index, id)); + } + + @Override + public boolean isDefaultIndex(final String indexName) throws DotDataException { + try { + return router.readChecked(impl -> impl.isDefaultIndex(indexName)); + } catch (DotDataException e) { + throw e; + } catch (Exception e) { + throw new DotDataException(e.getMessage(), e); + } + } + + @Override + public Map getAggregations(final String indexName, final String query) + throws DotDataException { + try { + return router.readChecked(impl -> impl.getAggregations(indexName, query)); + } catch (DotDataException e) { + throw e; + } catch (Exception e) { + throw new DotDataException(e.getMessage(), e); + } + } + + @Override + public Map getFacets(final String indexName, final String query) + throws DotDataException { + try { + return router.readChecked(impl -> impl.getFacets(indexName, query)); + } catch (DotDataException e) { + throw e; + } catch (Exception e) { + throw new DotDataException(e.getMessage(), e); + } + } + + // ------------------------------------------------------------------------- + // Writes — fan out to all active write providers + // ------------------------------------------------------------------------- + + @Override + public boolean createSiteSearchIndex(final String indexName, final String alias, final int shards) + throws DotSearchException, IOException { + try { + return router.writeReturningChecked( + impl -> impl.createSiteSearchIndex(indexName, alias, shards)); + } catch (DotSearchException | IOException e) { + throw e; + } catch (Exception e) { + throw new IOException(e.getMessage(), e); + } + } + + @Override + public boolean setAlias(final String indexName, final String alias) { + return router.writeBoolean(impl -> impl.setAlias(indexName, alias)); + } + + @Override + public void activateIndex(final String indexName) throws DotDataException { + try { + router.writeChecked(impl -> impl.activateIndex(indexName)); + } catch (DotDataException e) { + throw e; + } catch (Exception e) { + throw new DotDataException(e.getMessage(), e); + } + } + + @Override + public void deactivateIndex(final String indexName) throws DotDataException, IOException { + try { + router.writeChecked(impl -> impl.deactivateIndex(indexName)); + } catch (DotDataException | IOException e) { + throw e; + } catch (Exception e) { + throw new DotDataException(e.getMessage(), e); + } + } + + @Override + public void putToIndex(final String idx, final SiteSearchResult res, final String resultType) { + // Each provider gets its own copy: putToIndex mutates the result's backing map + // (e.g. SiteSearchResult.setKeywords rewrites the "keywords" entry String -> List), so a + // shared instance would let the first provider in the fan-out corrupt the input the next + // provider reads — producing a ClassCastException on the second leaf. The lambda is invoked + // once per provider, so copyOf(res) is evaluated fresh from the untouched original each time. + router.write(impl -> impl.putToIndex(idx, copyOf(res), resultType)); + } + + @Override + public void putToIndex(final String idx, final List res, final String resultType) { + // See single-result overload: copy per provider so the fan-out never shares mutable state. + router.write(impl -> impl.putToIndex(idx, copyOf(res), resultType)); + } + + /** + * Shallow-copies a {@link SiteSearchResult} so the fan-out can hand an independent instance to + * each write provider. {@code putToIndex} mutates the backing map in place (HTML stripping, + * description derivation, {@code keywords} String→List rewrite); copying the map prevents one + * provider's mutations from leaking into the next provider's input. A shallow map copy is + * sufficient because every mutation replaces a map entry rather than mutating a value object. + */ + private static SiteSearchResult copyOf(final SiteSearchResult res) { + return new SiteSearchResult(new HashMap<>(res.getMap())); + } + + /** Copies each element of a result batch — see {@link #copyOf(SiteSearchResult)}. */ + private static List copyOf(final List results) { + final List copies = new ArrayList<>(results.size()); + for (final SiteSearchResult r : results) { + copies.add(copyOf(r)); + } + return copies; + } + + @Override + public void deleteFromIndex(final String idx, final String docId) { + router.write(impl -> impl.deleteFromIndex(idx, docId)); + } + + @Override + public void deleteOldSiteSearchIndices() { + router.write(SiteSearchAPI::deleteOldSiteSearchIndices); + } + + // ------------------------------------------------------------------------- + // Quartz scheduling — single provider (shared scheduler; never fan out) + // ------------------------------------------------------------------------- + + @Override + public List getTasks() throws SchedulerException { + return router.readProvider().getTasks(); + } + + @Override + public ScheduledTask getTask(final String taskName) throws SchedulerException { + return router.readProvider().getTask(taskName); + } + + @Override + public void scheduleTask(final SiteSearchConfig config) + throws SchedulerException, ParseException, ClassNotFoundException { + router.readProvider().scheduleTask(config); + } + + @Override + public void deleteTask(final String taskName) throws SchedulerException { + router.readProvider().deleteTask(taskName); + } + + @Override + public void pauseTask(final String taskName) throws SchedulerException { + router.readProvider().pauseTask(taskName); + } + + @Override + public SiteSearchPublishStatus getTaskProgress(final String jobName) throws SchedulerException { + return router.readProvider().getTaskProgress(jobName); + } + + @Override + public boolean isTaskRunning(final String jobName) throws SchedulerException { + return router.readProvider().isTaskRunning(jobName); + } + + @Override + public void executeTaskNow(final SiteSearchConfig config) + throws SchedulerException, ParseException, ClassNotFoundException { + router.readProvider().executeTaskNow(config); + } +} diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java index 48db102f1ed9..e94984f6ed3c 100644 --- a/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java +++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java @@ -88,6 +88,12 @@ private static Aggregation fromSingle(final org.elasticsearch.search.aggregation builder.buckets(terms.getBuckets().stream() .map(AggregationBucket::from) .collect(Collectors.toList())); + } else if (esAgg instanceof org.elasticsearch.search.aggregations.bucket.histogram.Histogram) { + final org.elasticsearch.search.aggregations.bucket.histogram.Histogram histogram = + (org.elasticsearch.search.aggregations.bucket.histogram.Histogram) esAgg; + builder.buckets(histogram.getBuckets().stream() + .map(AggregationBucket::fromHistogram) + .collect(Collectors.toList())); } else if (esAgg instanceof org.elasticsearch.search.aggregations.metrics.TopHits) { final org.elasticsearch.search.aggregations.metrics.TopHits topHits = (org.elasticsearch.search.aggregations.metrics.TopHits) esAgg; diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java index c8904dcc8c34..79929696d59d 100644 --- a/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java +++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java @@ -92,6 +92,34 @@ public static AggregationBucket from( .build(); } + /** + * Creates a bucket from an Elasticsearch histogram bucket (date or numeric), including its + * sub-aggregations. The key is normalized to its numeric form so {@link #getKeyAsNumber()} + * returns the epoch-millis (date histogram) or the numeric interval (numeric histogram): + * a date-histogram key is a {@code java.time.ZonedDateTime} in ES 7.x, not a number, so it is + * converted to epoch-millis here rather than via {@code getKeyAsString()} (which yields a + * formatted date). + */ + public static AggregationBucket fromHistogram( + final org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket esBucket) { + return builder() + .key(histogramKey(esBucket.getKey())) + .docCount(esBucket.getDocCount()) + .subAggregations(Aggregation.from(esBucket.getAggregations())) + .build(); + } + + /** Normalizes a histogram bucket key to a numeric String ({@link #getKeyAsNumber()}-friendly). */ + private static String histogramKey(final Object key) { + if (key instanceof java.time.ZonedDateTime) { + return String.valueOf(((java.time.ZonedDateTime) key).toInstant().toEpochMilli()); + } + if (key instanceof Number) { + return String.valueOf(((Number) key).longValue()); + } + return String.valueOf(key); + } + // ------------------------------------------------------------------------- // OS factories // ------------------------------------------------------------------------- diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java new file mode 100644 index 000000000000..6a45e5d0186b --- /dev/null +++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java @@ -0,0 +1,32 @@ +package com.dotcms.content.index.domain; + +import com.dotmarketing.exception.DotRuntimeException; + +/** + * Vendor-neutral search exception for the index abstraction layer. + * + *

Replaces {@code org.elasticsearch.ElasticsearchException} on the public surface of the + * search/site-search APIs so that callers — and the interfaces themselves — no longer couple to + * Elasticsearch (or any other engine) types. It is the neutral failure signal raised by both the + * Elasticsearch and OpenSearch providers when a search or index operation cannot be completed.

+ * + *

It extends {@link DotRuntimeException} (and therefore is unchecked) to mirror the unchecked + * nature of {@code ElasticsearchException}: existing callers that never declared a {@code catch} + * for the vendor exception keep compiling unchanged.

+ */ +public class DotSearchException extends DotRuntimeException { + + private static final long serialVersionUID = 1L; + + public DotSearchException(final String message) { + super(message); + } + + public DotSearchException(final Throwable cause) { + super(cause); + } + + public DotSearchException(final String message, final Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/dotCMS/src/main/java/com/dotmarketing/business/APILocator.java b/dotCMS/src/main/java/com/dotmarketing/business/APILocator.java index f107e7a4f3f8..6d434689c27e 100644 --- a/dotCMS/src/main/java/com/dotmarketing/business/APILocator.java +++ b/dotCMS/src/main/java/com/dotmarketing/business/APILocator.java @@ -67,6 +67,7 @@ import com.dotcms.enterprise.linkchecker.LinkCheckerAPIImpl; import com.dotcms.enterprise.priv.ESSearchProxy; import com.dotcms.enterprise.publishing.sitesearch.ESSiteSearchAPI; +import com.dotcms.enterprise.publishing.sitesearch.SiteSearchAPIImpl; import com.dotcms.enterprise.rules.RulesAPI; import com.dotcms.experiments.business.ExperimentsAPI; import com.dotcms.experiments.business.ExperimentsAPIImpl; @@ -1483,7 +1484,7 @@ Object create() { case FORM_API: return new FormAPIImpl(); case MENULINK_API: return new MenuLinkAPIImpl(); case DASHBOARD_API: return new DashboardAPIImpl(); - case SITESEARCH_API: return new ESSiteSearchAPI(); + case SITESEARCH_API: return new SiteSearchAPIImpl(); case FILEASSET_API: return new FileAssetAPIImpl(); case VERSIONABLE_API: return new VersionableAPIImpl(); case WORKFLOW_API : return new WorkflowAPIImpl(); diff --git a/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java b/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java index ac2031f1ac73..7a13c33847b1 100644 --- a/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java +++ b/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java @@ -5,10 +5,10 @@ import java.util.List; import java.util.Map; -import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.search.aggregations.Aggregation; import org.quartz.SchedulerException; +import com.dotcms.content.index.domain.Aggregation; +import com.dotcms.content.index.domain.DotSearchException; import com.dotcms.enterprise.publishing.sitesearch.SiteSearchConfig; import com.dotcms.enterprise.publishing.sitesearch.SiteSearchPublishStatus; import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult; @@ -36,7 +36,7 @@ public interface SiteSearchAPI { void deactivateIndex(String indexName) throws DotDataException, IOException; - boolean createSiteSearchIndex(String indexName, String alias, int shards) throws ElasticsearchException, IOException; + boolean createSiteSearchIndex(String indexName, String alias, int shards) throws DotSearchException, IOException; boolean setAlias(String indexName, final String alias); diff --git a/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java b/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java index de09cbcff072..ed3bf36bb8dd 100644 --- a/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java +++ b/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java @@ -1,6 +1,8 @@ package com.dotmarketing.sitesearch.viewtool; import com.dotcms.content.index.IndexAPI; +import com.dotcms.content.index.domain.Aggregation; +import com.dotcms.content.index.domain.AggregationBucket; import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults; import com.dotmarketing.beans.Host; import com.dotmarketing.business.APILocator; @@ -13,16 +15,11 @@ import com.dotmarketing.util.StringUtils; import org.apache.velocity.tools.view.context.ViewContext; import org.apache.velocity.tools.view.tools.ViewTool; -import org.elasticsearch.search.aggregations.Aggregation; -import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram; -import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; -import org.elasticsearch.search.aggregations.bucket.terms.StringTerms.Bucket; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException; import java.util.*; -import org.joda.time.DateTime; public class SiteSearchWebAPI implements ViewTool { @@ -173,15 +170,16 @@ public Map getFacets(final String indexName, final String query) for (String key : aggregations.keySet()) { final Aggregation aggregation = aggregations.get(key); + final String type = aggregation.getType(); - if (aggregation instanceof InternalDateHistogram) { + if (isHistogram(type)) { internalFacet = new InternalWrapperCountDateHistogramFacet(aggregation.getName(), - aggregation.getType(), ((InternalDateHistogram) aggregation).getBuckets()); - } else if (aggregation instanceof StringTerms) { + type, aggregation.getBuckets()); + } else if (!aggregation.getBuckets().isEmpty()) { internalFacet = new InternalWrapperStringTermsFacet(aggregation.getName(), - aggregation.getType(), ((StringTerms) aggregation).getBuckets()); + type, aggregation.getBuckets()); } else { - internalFacet = new Facet(aggregation.getName(), aggregation.getType()); + internalFacet = new Facet(aggregation.getName(), type); } internalFacets.put(key, internalFacet); } @@ -189,23 +187,32 @@ public Map getFacets(final String indexName, final String query) return internalFacets; } + /** + * A histogram aggregation (date or numeric) reports a vendor type containing + * {@code "histogram"} (e.g. {@code date_histogram}); its buckets carry numeric keys. + */ + private static boolean isHistogram(final String type) { + return type != null && type.contains("histogram"); + } + /** * Internal wrapper class for backwards compatibility with the new Elastic Search in Site * Search. * - * @deprecated use ES Aggregations instead + * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead */ public class InternalWrapperCountDateHistogramFacet extends Facet { private final List entries; public InternalWrapperCountDateHistogramFacet(final String name, final String type, - List entries) { + List entries) { super(name, type); this.entries = new ArrayList<>(); - for (final InternalDateHistogram.Bucket entry : entries) { - this.entries.add(new CountEntry(((DateTime) entry.getKey()).getMillis(), - entry.getDocCount())); + for (final AggregationBucket entry : entries) { + final Number key = entry.getKeyAsNumber(); + final long time = key != null ? key.longValue() : 0L; + this.entries.add(new CountEntry(time, entry.getDocCount())); } } @@ -237,20 +244,20 @@ public long getCount() { * Internal wrapper class for backwards compatibility with the new Elastic Search in Site * Search. * - * @deprecated use ES Aggregations instead + * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead */ public class InternalWrapperStringTermsFacet extends Facet { private List entries; - public InternalWrapperStringTermsFacet(final String name, final String type, final List entries) { + public InternalWrapperStringTermsFacet(final String name, final String type, final List entries) { super(name, type); this.entries = new ArrayList<>(); - for (final Bucket entry : entries) { + for (final AggregationBucket entry : entries) { this.entries - .add(new InternalTermEntry(entry.getKey().toString(), entry.getDocCount())); + .add(new InternalTermEntry(entry.getKey(), entry.getDocCount())); } } @@ -279,7 +286,7 @@ public long getCount() { } /** - * @deprecated use ES Aggregations instead + * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead */ public class Facet { diff --git a/dotCMS/src/main/resources/os-sitesearch-settings.json b/dotCMS/src/main/resources/os-sitesearch-settings.json new file mode 100644 index 000000000000..168e3e0bcb1c --- /dev/null +++ b/dotCMS/src/main/resources/os-sitesearch-settings.json @@ -0,0 +1,39 @@ +{ + "analysis": { + "filter": { + "content_ngrams": { + "type": "edge_ngram", + "min_gram": 1, + "max_gram": 10 + }, + "content_stemmer": { + "type": "stemmer", + "name": "english" + } + }, + "analyzer": { + "standard_content": { + "type": "custom", + "tokenizer": "standard", + "filter": [ + "lowercase", + "asciifolding", + "content_stemmer" + ] + }, + "partial_content": { + "type": "custom", + "tokenizer": "standard", + "filter": [ + "lowercase", + "asciifolding", + "content_ngrams" + ] + }, + "comma_analyzer": { + "type": "pattern", + "pattern": "," + } + } + } +} diff --git a/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java b/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java index b1e5bf853a22..fa9b83f71785 100644 --- a/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java +++ b/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java @@ -52,6 +52,7 @@ com.dotcms.rendering.velocity.viewtools.content.ContentMapTest.class, com.dotcms.rendering.velocity.viewtools.content.ContentToolTest.class, com.dotcms.rendering.velocity.viewtools.ContentSearchToolTest.class, + com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPITest.class, com.dotcms.rendering.velocity.viewtools.WorkflowToolTest.class, com.dotcms.rendering.velocity.viewtools.WebsiteToolTest.class, com.dotcms.rendering.velocity.viewtools.LanguageWebAPITest.class, diff --git a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java index 722e82865a7c..ce000e4e7f0f 100644 --- a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java +++ b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java @@ -12,6 +12,8 @@ import com.dotcms.content.index.opensearch.OSClientConfigTest; import com.dotcms.content.index.opensearch.OSClientProviderIntegrationTest; import com.dotcms.content.index.opensearch.OSSearchAPIImplIntegrationTest; +import com.dotcms.content.index.opensearch.OSSiteSearchAPIIntegrationTest; +import com.dotcms.enterprise.publishing.sitesearch.SiteSearchDualWriteRouterIT; import com.dotcms.junit.MainBaseSuite; import org.junit.runner.RunWith; import org.junit.runners.Suite.SuiteClasses; @@ -46,7 +48,9 @@ OSClientConfigTest.class, ContentletIndexAPIImplMigrationIntegrationTest.class, ContentletIndexAPIImplPhaseSwitchIntegrationTest.class, - OSSearchAPIImplIntegrationTest.class + OSSearchAPIImplIntegrationTest.class, + OSSiteSearchAPIIntegrationTest.class, + SiteSearchDualWriteRouterIT.class }) public class OpenSearchUpgradeSuite { } \ No newline at end of file diff --git a/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java b/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java new file mode 100644 index 000000000000..631212d287a6 --- /dev/null +++ b/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java @@ -0,0 +1,300 @@ +package com.dotcms.content.index.opensearch; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import com.dotcms.DataProviderWeldRunner; +import com.dotcms.IntegrationTestBase; +import com.dotcms.content.index.domain.Aggregation; +import com.dotcms.enterprise.publishing.sitesearch.OSSiteSearchAPI; +import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult; +import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults; +import com.dotcms.LicenseTestUtil; +import com.dotcms.util.IntegrationTestInitService; +import com.dotmarketing.business.APILocator; +import com.dotmarketing.common.db.DotConnect; +import com.dotmarketing.util.Logger; +import com.dotmarketing.util.json.JSONObject; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import javax.enterprise.context.ApplicationScoped; +import javax.inject.Inject; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +/** + * Integration tests for {@link OSSiteSearchAPI} exercised against a live OpenSearch 3.x container. + * + *

Validates the OpenSearch implementation of the Site Search API in isolation: index lifecycle + * (create / list / delete), the document round-trip ({@code putToIndex} → {@code getFromIndex} → + * {@code search} → {@code deleteFromIndex}), aggregations, and default-index activation through + * {@link com.dotcms.content.index.VersionedIndicesAPI}.

+ * + *

The {@code @Inject}-ed {@link OSSiteSearchAPI} resolves its OpenSearch client through + * {@link OSClientProvider}; the {@code @Alternative} {@link OSTestClientProvider} on the test + * classpath points it at the {@code opensearch-upgrade} container ({@code http://localhost:9201}). + * Index names are scoped with a per-run suffix so concurrent runs never collide; the {@code .os} + * tag is intentionally not used for site search (see {@link OSSiteSearchAPI}).

+ * + *

Registered in {@link com.dotcms.OpenSearchUpgradeSuite}. Run with: + *

+ *   ./mvnw verify -pl :dotcms-integration \
+ *       -Dcoreit.test.skip=false \
+ *       -Dopensearch.upgrade.test=true
+ * 
+ *

+ * + * @author Fabrizio Araya + */ +@ApplicationScoped +@RunWith(DataProviderWeldRunner.class) +public class OSSiteSearchAPIIntegrationTest extends IntegrationTestBase { + + private static final String RUN_ID = + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + + /** Numeric suffix so names match the {@code sitesearch_} convention. */ + private static final String SUFFIX = String.valueOf(Math.abs((long) RUN_ID.hashCode())); + + private static final String IDX_ONE = "sitesearch_" + SUFFIX; + private static final String IDX_TWO = "sitesearch_" + (Long.parseLong(SUFFIX) + 1); + + private static final String DOC_ID = "os-ss-it-" + RUN_ID; + + @Inject + private OSSiteSearchAPI osSiteSearchAPI; + + @Inject + private OSIndexAPIImpl osIndexAPI; + + // ======================================================================= + // Lifecycle + // ======================================================================= + + @BeforeClass + public static void prepare() throws Exception { + IntegrationTestInitService.getInstance().init(); + LicenseTestUtil.getLicense(); + } + + @Before + public void setUp() { + cleanupTestData(); + } + + @After + public void tearDown() { + cleanupTestData(); + } + + // ======================================================================= + // Section 1 — Core index lifecycle + // ======================================================================= + + /** + * Given scenario: a fresh site-search index name that does not yet exist in OpenSearch. + * Expected: createSiteSearchIndex creates it, indexExists reports it, and it shows up in + * listIndices. + */ + @Test + public void test_createSiteSearchIndex_shouldExistAndBeListed() throws Exception { + assertFalse("Pre-condition: index must not exist yet", osIndexAPI.indexExists(IDX_ONE)); + + final boolean created = osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1); + + assertTrue("createSiteSearchIndex must return true", created); + assertTrue("Index must exist in OpenSearch after creation", osIndexAPI.indexExists(IDX_ONE)); + assertTrue("Index must be returned by listIndices", + osSiteSearchAPI.listIndices().contains(IDX_ONE)); + + Logger.info(this, "✅ test_createSiteSearchIndex_shouldExistAndBeListed passed – index: " + IDX_ONE); + } + + /** + * Given scenario: an existing site-search index. + * Expected: deleting it through the OpenSearch index API removes it from the cluster. + */ + @Test + public void test_deleteSiteSearchIndex_shouldRemoveIt() throws Exception { + osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1); + assertTrue("Pre-condition: index must exist", osIndexAPI.indexExists(IDX_ONE)); + + osIndexAPI.delete(IDX_ONE); + + assertFalse("Index must be gone after deletion", osIndexAPI.indexExists(IDX_ONE)); + Logger.info(this, "✅ test_deleteSiteSearchIndex_shouldRemoveIt passed"); + } + + // ======================================================================= + // Section 2 — Document round-trip (put / get / search / delete) + // ======================================================================= + + /** + * Given scenario: an empty site-search index. + * Expected: a document put to the index is retrievable by id, discoverable by search, and gone + * after deleteFromIndex. + */ + @Test + public void test_putGetSearchDelete_documentRoundTrip() throws Exception { + osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1); + assertNull("Pre-condition: document must not exist yet", + osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID)); + + final SiteSearchResult doc = new SiteSearchResult(); + doc.setId(DOC_ID); + doc.setUrl("/os-site-search-it/" + RUN_ID); + doc.setTitle("OpenSearch Site Search IT " + RUN_ID); + doc.setMimeType("text/html"); + doc.setContent("dotcms opensearch site search integration roundtrip " + RUN_ID); + doc.setContentLength(doc.getContent().length()); + + osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content"); + + final SiteSearchResult fetched = osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID); + assertNotNull("Document must be retrievable after put", fetched); + assertEquals("Fetched document id must match", DOC_ID, fetched.getId()); + + final SiteSearchResults results = osSiteSearchAPI.search(IDX_ONE, "roundtrip", 0, 10); + assertNull("Search must not return an error: " + results.getError(), results.getError()); + assertTrue("Search must find the indexed document", results.getTotalResults() >= 1); + + osSiteSearchAPI.deleteFromIndex(IDX_ONE, DOC_ID); + assertNull("Document must be gone after deleteFromIndex", + osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID)); + + Logger.info(this, "✅ test_putGetSearchDelete_documentRoundTrip passed – hits: " + + results.getTotalResults()); + } + + /** + * Given scenario: an index holding a few documents that share a common term. + * Expected: a terms aggregation query returns a non-null aggregation tree keyed by the + * aggregation name. + */ + @Test + public void test_getAggregations_shouldReturnBuckets() throws Exception { + osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1); + + for (int i = 0; i < 3; i++) { + final SiteSearchResult doc = new SiteSearchResult(); + doc.setId(DOC_ID + "-" + i); + doc.setUrl("/agg/" + RUN_ID + "/" + i); + doc.setTitle("Aggregation doc " + i); + doc.setMimeType("text/html"); + doc.setContent("aggregation bucket sample " + RUN_ID); + doc.setContentLength(doc.getContent().length()); + osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content"); + } + + final String aggQuery = new JSONObject() + .put("size", 0) + .put("aggs", new JSONObject().put("by_mime", + new JSONObject().put("terms", + new JSONObject().put("field", "mimeType")))).toString(); + + final Map aggregations = + osSiteSearchAPI.getAggregations(IDX_ONE, aggQuery); + + assertNotNull("Aggregations map must not be null", aggregations); + assertTrue("Aggregation 'by_mime' must be present", aggregations.containsKey("by_mime")); + + Logger.info(this, "✅ test_getAggregations_shouldReturnBuckets passed – keys: " + + aggregations.keySet()); + } + + // ======================================================================= + // Section 3 — Default index activation (VersionedIndicesAPI) + // ======================================================================= + + /** + * Given scenario: a created site-search index that is not yet the default. + * Expected: activateIndex makes isDefaultIndex true and orders it first in listIndices; + * deactivateIndex clears the default. + */ + @Test + public void test_activateDeactivate_shouldToggleDefault() throws Exception { + osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1); + assertFalse("Pre-condition: index must not be default yet", + osSiteSearchAPI.isDefaultIndex(IDX_ONE)); + + osSiteSearchAPI.activateIndex(IDX_ONE); + assertTrue("Index must be the default after activation", + osSiteSearchAPI.isDefaultIndex(IDX_ONE)); + + osSiteSearchAPI.deactivateIndex(IDX_ONE); + assertFalse("Index must no longer be the default after deactivation", + osSiteSearchAPI.isDefaultIndex(IDX_ONE)); + + Logger.info(this, "✅ test_activateDeactivate_shouldToggleDefault passed"); + } + + /** + * Given scenario: two created site-search indices with the second activated as default. + * Expected: listIndices returns both and places the active (default) index first. + */ + @Test + public void test_listIndices_shouldPlaceDefaultFirst() throws Exception { + osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1); + osSiteSearchAPI.createSiteSearchIndex(IDX_TWO, null, 1); + + osSiteSearchAPI.activateIndex(IDX_TWO); + + final List indices = osSiteSearchAPI.listIndices(); + assertTrue("Both indices must be listed", + indices.contains(IDX_ONE) && indices.contains(IDX_TWO)); + assertEquals("The default index must be first", IDX_TWO, indices.get(0)); + + Logger.info(this, "✅ test_listIndices_shouldPlaceDefaultFirst passed – order: " + indices); + } + + // ======================================================================= + // Section 4 — Additional interface methods + // ======================================================================= + + /** + * Given scenario: no closed site-search indices for this run. + * Expected: listClosedIndices returns a non-null list without raising. + */ + @Test + public void test_listClosedIndices_shouldNotFail() { + final List closed = osSiteSearchAPI.listClosedIndices(); + assertNotNull("listClosedIndices must never return null", closed); + Logger.info(this, "✅ test_listClosedIndices_shouldNotFail passed – count: " + closed.size()); + } + + // ======================================================================= + // Cleanup helpers + // ======================================================================= + + private synchronized void cleanupTestData() { + for (final String name : List.of(IDX_ONE, IDX_TWO)) { + try { + if (osIndexAPI.indexExists(name)) { + osIndexAPI.delete(name); + } + } catch (final Exception e) { + Logger.warn(this, "Cleanup: error removing OS index '" + name + "': " + e.getMessage()); + } + } + cleanupVersionedRows(); + } + + private void cleanupVersionedRows() { + try { + new DotConnect() + .setSQL("DELETE FROM indicies WHERE index_name LIKE ?") + .addParam("%" + SUFFIX + "%") + .loadResult(); + APILocator.getVersionedIndicesAPI().clearCache(); + } catch (final Exception e) { + Logger.warn(this, "Cleanup: error removing versioned DB rows: " + e.getMessage()); + } + } +} diff --git a/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java b/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java new file mode 100644 index 000000000000..a400ce4aae36 --- /dev/null +++ b/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java @@ -0,0 +1,246 @@ +package com.dotcms.enterprise.publishing.sitesearch; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeFalse; + +import com.dotcms.DataProviderWeldRunner; +import com.dotcms.IntegrationTestBase; +import com.dotcms.LicenseTestUtil; +import com.dotcms.content.elasticsearch.business.ESIndexAPI; +import com.dotcms.content.index.IndexAPIImpl; +import com.dotcms.content.index.IndexConfigHelper; +import com.dotcms.content.index.opensearch.OSIndexAPIImpl; +import com.dotcms.util.IntegrationTestInitService; +import com.dotmarketing.business.APILocator; +import com.dotmarketing.common.db.DotConnect; +import com.dotmarketing.sitesearch.business.SiteSearchAPI; +import com.dotmarketing.util.Config; +import com.dotmarketing.util.Logger; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import javax.enterprise.context.ApplicationScoped; +import javax.inject.Inject; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +/** + * Integration tests that exercise Site Search through the phase-aware {@link SiteSearchAPIImpl} + * router in a dual-write phase, where every write fans out to both the + * Elasticsearch ({@link ESSiteSearchAPI}) and OpenSearch ({@link OSSiteSearchAPI}) leaves. + * + *

These tests guard two regressions that only reproduce through the router fan-out — the + * isolated {@link com.dotcms.content.index.opensearch.OSSiteSearchAPIIntegrationTest} (which calls + * the OS leaf directly) cannot catch them:

+ * + *
    + *
  1. Shared mutable result across the fan-out. {@code putToIndex} mutates the + * {@link SiteSearchResult} map in place — notably {@link SiteSearchResult#setKeywords(String)} + * rewrites the {@code keywords} entry from a {@code String} to a {@code List}. With a single + * shared instance, the first leaf (ES) corrupted the input the second leaf (OS) then read, + * producing {@code ClassCastException: EmptyList cannot be cast to String} on the OS write — + * silently dropping every document from OpenSearch. The router now hands each + * provider its own copy. This test asserts the document actually lands in OpenSearch.
  2. + *
  3. Mapping fan-out leak. {@code createSiteSearchIndex} on the ES leaf applied + * its mapping through the phase-dispatched {@code ESMappingAPIImpl.putMapping}, which fanned + * out a second time to OpenSearch using a {@code .os}-tagged physical name that site-search OS + * indices never use → HTTP 404. The create path is now ES-pinned; this test asserts a + * router-driven create yields a fully functional, queryable OS index.
  4. + *
+ * + *

Runs only when ES and OS are separate clusters (dual-write requires two endpoints); skipped + * via {@link org.junit.Assume#assumeFalse} on the single-cluster {@code opensearch-upgrade} + * profile. Registered in {@link com.dotcms.OpenSearchUpgradeSuite}. Run with: + *

+ *   ./mvnw verify -pl :dotcms-integration \
+ *       -Dcoreit.test.skip=false \
+ *       -Dopensearch.upgrade.test=true
+ * 
+ * + * @author Fabrizio Araya + */ +@ApplicationScoped +@RunWith(DataProviderWeldRunner.class) +public class SiteSearchDualWriteRouterIT extends IntegrationTestBase { + + /** Phase 1 — dual-write, ES reads. Writes fan out to [ES, OS]; reads come from ES. */ + private static final int PHASE_DUAL_WRITE_ES_READS = 1; + + private static final String RUN_ID = + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + + /** Numeric suffix so the name matches the {@code sitesearch_} convention. */ + private static final String SUFFIX = String.valueOf(Math.abs((long) RUN_ID.hashCode())); + + private static final String IDX = "sitesearch_" + SUFFIX; + private static final String DOC_ID = "ss-dualwrite-it-" + RUN_ID; + + @Inject + private OSSiteSearchAPI osSiteSearchAPI; + + @Inject + private OSIndexAPIImpl osIndexAPI; + + /** The phase-aware fan-out router under test. */ + private SiteSearchAPI router; + + // ======================================================================= + // Lifecycle + // ======================================================================= + + @BeforeClass + public static void prepare() throws Exception { + IntegrationTestInitService.getInstance().init(); + LicenseTestUtil.getLicense(); + } + + @Before + public void setUp() { + // Dual-write fans out to both clusters; a single-cluster profile would collide on the + // shared untagged site-search name (and cannot host both leaves), so skip there. + assumeFalse("Requires separate ES and OS clusters for dual-write", esSameAsOs()); + router = APILocator.getSiteSearchAPI(); + cleanupTestData(); + setPhase(PHASE_DUAL_WRITE_ES_READS); + } + + @After + public void tearDown() { + setPhase(null); + cleanupTestData(); + } + + // ======================================================================= + // Tests + // ======================================================================= + + /** + * Given scenario: Phase 1 (dual-write). An index and a single document with {@code keywords} + * set are written through the router, fanning out to ES then OS on the same result instance. + * Expected: the document reaches OpenSearch (no {@code ClassCastException} on the OS leaf) and + * is searchable through the router's ES read path — proving the dual-write completed on both + * backends. {@code keywords} round-trips as a {@code List}. + */ + @Test + public void test_dualWritePutToIndex_documentReachesBothBackends() throws Exception { + router.createSiteSearchIndex(IDX, null, 1); + + final SiteSearchResult doc = new SiteSearchResult(); + doc.setId(DOC_ID); + doc.setUrl("/ss-dualwrite-it/" + RUN_ID); + doc.setTitle("Dual-write Site Search IT " + RUN_ID); + doc.setMimeType("text/html"); + doc.setContent("dotcms dual write roundtrip " + RUN_ID); + doc.setContentLength(doc.getContent().length()); + // The exact Bug 1 trigger: keywords enters the map as a raw String. The first leaf in the + // fan-out rewrites it to a List; the second leaf must not see that mutation. + doc.getMap().put("keywords", "alpha, beta"); + + router.putToIndex(IDX, doc, "content"); + + // Bug 1 — OpenSearch must have received the document (unpatched: ClassCastException → null). + final SiteSearchResult fromOs = osSiteSearchAPI.getFromIndex(IDX, DOC_ID); + assertNotNull("Document must be retrievable from OpenSearch after dual-write", fromOs); + assertEquals("Document id must match in OpenSearch", DOC_ID, fromOs.getId()); + assertEquals("keywords must round-trip as a trimmed list", + List.of("alpha", "beta"), fromOs.getKeywords()); + + // The dual-write also reached ES: in Phase 1 the router reads from ES. + final SiteSearchResults esRead = router.search(IDX, "roundtrip", 0, 10); + assertNull("ES read must not error: " + esRead.getError(), esRead.getError()); + assertTrue("Document must be searchable via the router's ES read path", + esRead.getTotalResults() >= 1); + + Logger.info(this, "✅ test_dualWritePutToIndex_documentReachesBothBackends passed"); + } + + /** + * Given scenario: Phase 1 (dual-write). A batch of documents is written through the + * {@code putToIndex(String, List, String)} router overload. This exercises the list fan-out + * path, where each provider must receive its own copy of every result. + * Expected: every document lands in OpenSearch. + */ + @Test + public void test_dualWriteBatchPutToIndex_allDocumentsReachOpenSearch() throws Exception { + router.createSiteSearchIndex(IDX, null, 1); + + final List docs = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + final SiteSearchResult doc = new SiteSearchResult(); + doc.setId(DOC_ID + "-" + i); + doc.setUrl("/ss-dualwrite-batch/" + RUN_ID + "/" + i); + doc.setTitle("Batch doc " + i); + doc.setMimeType("text/html"); + doc.setContent("dotcms dual write batch sample " + RUN_ID); + doc.setContentLength(doc.getContent().length()); + doc.getMap().put("keywords", "kw" + i + ", shared"); + docs.add(doc); + } + + router.putToIndex(IDX, docs, "content"); + + for (int i = 0; i < 3; i++) { + final String id = DOC_ID + "-" + i; + assertNotNull("Batch document '" + id + "' must reach OpenSearch", + osSiteSearchAPI.getFromIndex(IDX, id)); + } + + Logger.info(this, "✅ test_dualWriteBatchPutToIndex_allDocumentsReachOpenSearch passed"); + } + + // ======================================================================= + // Helpers + // ======================================================================= + + /** + * True when the ES and OS clients are configured against the same cluster endpoint (the + * single-cluster {@code opensearch-upgrade} profile). Mirrors the gate used by the core + * migration ITs. + */ + private static boolean esSameAsOs() { + final String esEndpoint = Config.getStringProperty("DOT_ES_ENDPOINTS", + "http://localhost:9207"); + final String osEndpoint = Config.getStringProperty("OS_ENDPOINTS", + "http://localhost:9201"); + return esEndpoint.trim().equalsIgnoreCase(osEndpoint.trim()); + } + + private static void setPhase(final Integer ordinal) { + Config.setProperty(IndexConfigHelper.MigrationPhase.FLAG_KEY, + ordinal == null ? null : String.valueOf(ordinal)); + } + + private synchronized void cleanupTestData() { + try { + if (osIndexAPI.indexExists(IDX)) { + osIndexAPI.delete(IDX); + } + } catch (final Exception e) { + Logger.warn(this, "Cleanup: error removing OS index '" + IDX + "': " + e.getMessage()); + } + // The dual-write create also lands an ES index; remove it directly on the ES cluster. + try { + final ESIndexAPI esIndex = ((IndexAPIImpl) APILocator.getESIndexAPI()).esImpl(); + if (esIndex.indexExists(IDX)) { + esIndex.delete(IDX); + } + } catch (final Exception e) { + Logger.warn(this, "Cleanup: error removing ES index '" + IDX + "': " + e.getMessage()); + } + try { + new DotConnect() + .setSQL("DELETE FROM indicies WHERE index_name LIKE ?") + .addParam("%" + SUFFIX + "%") + .loadResult(); + APILocator.getVersionedIndicesAPI().clearCache(); + } catch (final Exception e) { + Logger.warn(this, "Cleanup: error removing versioned DB rows: " + e.getMessage()); + } + } +} diff --git a/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java new file mode 100644 index 000000000000..215d94a58fa2 --- /dev/null +++ b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java @@ -0,0 +1,494 @@ +package com.dotmarketing.sitesearch.viewtool; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.dotcms.IntegrationTestBase; +import com.dotcms.LicenseTestUtil; +import com.dotcms.content.index.domain.Aggregation; +import com.dotcms.content.index.domain.AggregationBucket; +import com.dotcms.content.index.domain.SearchHit; +import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult; +import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults; +import com.dotcms.util.IntegrationTestInitService; +import com.dotmarketing.business.APILocator; +import com.dotmarketing.sitesearch.business.SiteSearchAPI; +import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.Facet; +import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperCountDateHistogramFacet; +import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperStringTermsFacet; +import com.dotmarketing.util.Logger; +import java.util.List; +import java.util.Map; +import java.util.Set; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.apache.velocity.tools.view.context.ViewContext; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Integration test for the {@link SiteSearchWebAPI} Velocity view tool, modelled on + * {@code ContentSearchToolTest}. + * + *

Exercises the public view-tool surface end-to-end against a live search backend after the + * Elasticsearch → OpenSearch neutral-aggregation refactor (#35786), with emphasis on the fields of + * the POJOs returned by the refactored methods:

+ *
    + *
  • {@code search(...)} → {@link SiteSearchResults} / {@link SiteSearchResult} fields, the + * alias path, the default-index path, pagination and error states.
  • + *
  • {@code getAggregations(...)} → the neutral {@link Aggregation} / {@link AggregationBucket} + * tree: name/type/buckets, doc counts, {@code getKeyAsNumber} (numeric histogram), and the + * nested {@code top_hits} {@link SearchHit}s.
  • + *
  • {@code getFacets(...)} → all three legacy wrappers: string-terms, date/numeric-histogram and + * the plain {@link Facet} fallback, plus their entry POJOs.
  • + *
  • {@code listSearchIndicies()} / {@code listSearchIncidies()}.
  • + *
+ * + *

The tool resolves its backend through {@code APILocator.getSiteSearchAPI()} — now the + * {@code SiteSearchAPIImpl} phase router — so this also proves the router wiring did not break the + * legacy view-tool contract. Runs in the default integration profile (migration Phase 0 → + * Elasticsearch), like {@code ContentSearchToolTest}; no OpenSearch container is required.

+ * + * @author Fabrizio Araya + */ +public class SiteSearchWebAPITest extends IntegrationTestBase { + + private static final long SUFFIX = System.currentTimeMillis(); + private static final String IDX = "sitesearch_" + SUFFIX; + private static final String ALIAS = "ss_it_alias_" + SUFFIX; + + /** Unique token embedded in every indexed doc so the text query matches only this run's data. */ + private static final String TOKEN = "ssqa" + SUFFIX; + + private static final String MIME_HTML = "text/html"; + private static final String MIME_PDF = "application/pdf"; + private static final Set EXPECTED_MIMES = Set.of(MIME_HTML, MIME_PDF); + + /** 3 html docs + 2 pdf docs = 5 docs, all carrying TOKEN. */ + private static final int HTML_DOCS = 3; + private static final int PDF_DOCS = 2; + private static final int TOTAL_DOCS = HTML_DOCS + PDF_DOCS; + + // ---- Queries (JSON, so search() skips the request-host lookup) ----------------------------- + + private static final String SEARCH_TOKEN = + "{\"query\":{\"query_string\":{\"query\":\"" + "TOKEN_PLACEHOLDER" + + "\",\"default_field\":\"*\"}}}"; + + private static final String TERMS_AGG = + "{\"size\":0,\"aggs\":{\"by_mime\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}}}}"; + + private static final String NESTED_AGG = + "{\"size\":0,\"aggs\":{\"by_mime\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}," + + "\"aggs\":{\"top_docs\":{\"top_hits\":{\"size\":2}}}}}}"; + + private static final String HISTO_AGG = + "{\"size\":0,\"aggs\":{\"by_len\":{\"histogram\":{\"field\":\"contentLength\"," + + "\"interval\":25}}}}"; + + /** Query matches no doc, so the terms aggregation comes back with empty buckets. */ + private static final String EMPTY_AGG = + "{\"size\":0,\"query\":{\"term\":{\"mimeType\":\"zzz/none\"}}," + + "\"aggs\":{\"empty\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}}}}"; + + private static SiteSearchAPI siteSearchAPI; + + @BeforeClass + public static void prepare() throws Exception { + IntegrationTestInitService.getInstance().init(); + LicenseTestUtil.getLicense(); + + siteSearchAPI = APILocator.getSiteSearchAPI(); + + // Create the index WITH an alias (so the alias search path is exercised) and activate it as + // the default (so the default-index search path is exercised). + siteSearchAPI.createSiteSearchIndex(IDX, ALIAS, 1); + siteSearchAPI.activateIndex(IDX); + + for (int i = 0; i < TOTAL_DOCS; i++) { + final boolean html = i < HTML_DOCS; + final SiteSearchResult doc = new SiteSearchResult(); + doc.setId("ss-it-" + SUFFIX + "-" + i); + doc.setUrl("/site-search-webapi-it/" + i); + doc.setTitle("Site Search WebAPI IT doc " + i); + doc.setHost("demo.dotcms.com"); + doc.setAuthor("qa-author-" + i); + doc.setMimeType(html ? MIME_HTML : MIME_PDF); + // Vary the body length so the numeric histogram on contentLength spreads over buckets. + doc.setContent("dotcms site search viewtool integration " + TOKEN + + " ".repeat(i * 30)); + doc.setContentLength(doc.getContent().length()); + siteSearchAPI.putToIndex(IDX, doc, "content"); + } + } + + @AfterClass + public static void cleanup() { + try { + siteSearchAPI.deactivateIndex(IDX); + } catch (final Exception e) { + Logger.warn(SiteSearchWebAPITest.class, "Cleanup: deactivate failed: " + e.getMessage()); + } + try { + APILocator.getESIndexAPI() + .delete(APILocator.getESIndexAPI().getNameWithClusterIDPrefix(IDX)); + } catch (final Exception e) { + Logger.warn(SiteSearchWebAPITest.class, "Cleanup: delete failed: " + e.getMessage()); + } + } + + /** Builds a {@link SiteSearchWebAPI} initialized with a mock request/response. */ + private SiteSearchWebAPI siteSearchWebAPI() { + final ViewContext viewContext = mock(ViewContext.class); + final HttpServletRequest request = mock(HttpServletRequest.class); + final HttpServletResponse response = mock(HttpServletResponse.class); + when(viewContext.getRequest()).thenReturn(request); + when(viewContext.getResponse()).thenReturn(response); + + final SiteSearchWebAPI tool = new SiteSearchWebAPI(); + tool.init(viewContext); + return tool; + } + + private static String searchToken() { + return SEARCH_TOKEN.replace("TOKEN_PLACEHOLDER", TOKEN); + } + + // ========================================================================= + // listSearchIndicies + // ========================================================================= + + /** + * Given scenario: a populated, active site-search index. + * Expected: listSearchIndicies() (and its legacy-typo alias) returns the created index. + */ + @Test + public void listSearchIndicies_containsCreatedIndex() { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + assertTrue("listSearchIndicies() must contain the created index", + tool.listSearchIndicies().contains(IDX)); + assertTrue("legacy-typo alias listSearchIncidies() must behave identically", + tool.listSearchIncidies().contains(IDX)); + + Logger.info(this, "✅ listSearchIndicies_containsCreatedIndex passed"); + } + + // ========================================================================= + // search — SiteSearchResults / SiteSearchResult field coverage + // ========================================================================= + + /** + * Given scenario: 5 docs carrying TOKEN in the default (active) index. + * Expected: the default-index search (3-arg) populates every SiteSearchResults field and each + * SiteSearchResult exposes id/url/title/mimeType/score. + */ + @Test + public void search_defaultIndex_populatesResultFields() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final SiteSearchResults results = tool.search(searchToken(), 0, 10); + + assertNull("Search must not return an error: " + results.getError(), results.getError()); + assertEquals("All TOKEN docs must be counted", TOTAL_DOCS, results.getTotalResults()); + assertEquals("getTotalHits() alias must match getTotalResults()", + results.getTotalResults(), results.getTotalHits()); + assertEquals("Result rows must match the total (under the page size)", + TOTAL_DOCS, results.getResults().size()); + assertTrue("maxScore must be positive for a matching query", results.getMaxScore() > 0); + assertEquals("offset must reflect the requested start", 0, results.getOffset()); + assertEquals("start alias must match offset", results.getOffset(), results.getStart()); + assertEquals("limit must reflect the requested rows", 10, results.getLimit()); + assertNotNull("query echo must be set", results.getQuery()); + assertNotNull("took must be set", results.getTook()); + + for (final SiteSearchResult hit : results.getResults()) { + assertNotNull("each hit must carry an id", hit.getId()); + assertTrue("each hit id must belong to this run", hit.getId().startsWith("ss-it-" + SUFFIX)); + assertNotNull("each hit must carry a url", hit.getUrl()); + assertNotNull("each hit must carry a title", hit.getTitle()); + assertTrue("each hit mimeType must be one of the indexed types", + EXPECTED_MIMES.contains(hit.getMimeType())); + assertTrue("each hit must have a positive score", hit.getScore() > 0); + } + + Logger.info(this, "✅ search_defaultIndex_populatesResultFields passed – hits: " + + results.getTotalResults()); + } + + /** + * Given scenario: the index was created with an alias. + * Expected: the 4-arg alias search resolves the alias to the backing index and returns the docs. + */ + @Test + public void search_byAlias_resolvesIndex() { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final SiteSearchResults results = tool.search(ALIAS, searchToken(), 0, 10); + + assertNull("Alias search must not return an error: " + results.getError(), + results.getError()); + assertEquals("Alias search must reach the same docs", TOTAL_DOCS, results.getTotalResults()); + + Logger.info(this, "✅ search_byAlias_resolvesIndex passed"); + } + + /** + * Given scenario: a JSON body that caps the page size to 2. + * Expected: the returned rows are capped to the page size while the total still reflects all + * matches — covering the offset/limit/totalResults fields together. + */ + @Test + public void search_pagination_capsReturnedRows() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final String paged = "{\"size\":2,\"query\":{\"query_string\":{\"query\":\"" + TOKEN + + "\",\"default_field\":\"*\"}}}"; + final SiteSearchResults results = tool.search(paged, 0, 2); + + assertNull("Paged search must not error: " + results.getError(), results.getError()); + assertEquals("Total must still reflect every match", TOTAL_DOCS, results.getTotalResults()); + assertTrue("Returned rows must be capped by the page size", + results.getResults().size() <= 2); + + Logger.info(this, "✅ search_pagination_capsReturnedRows passed – returned: " + + results.getResults().size()); + } + + /** + * Given scenario: a query for a token that matches nothing. + * Expected: zero results, an empty result list and no error (a clean empty response). + */ + @Test + public void search_noMatch_returnsEmptyWithoutError() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final String noMatch = "{\"query\":{\"query_string\":{\"query\":\"zzznomatchzzz" + SUFFIX + + "\",\"default_field\":\"*\"}}}"; + final SiteSearchResults results = tool.search(noMatch, 0, 10); + + assertNull("No-match search must not error", results.getError()); + assertEquals("No-match search must count zero", 0, results.getTotalResults()); + assertTrue("No-match search must return no rows", results.getResults().isEmpty()); + + Logger.info(this, "✅ search_noMatch_returnsEmptyWithoutError passed"); + } + + /** + * Given scenario: a null query. + * Expected: the tool reports an error on the SiteSearchResults rather than throwing. + */ + @Test + public void search_nullQuery_setsError() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final SiteSearchResults results = tool.search(null, 0, 10); + + assertNotNull("A null query must surface an error", results.getError()); + Logger.info(this, "✅ search_nullQuery_setsError passed – error: " + results.getError()); + } + + // ========================================================================= + // getAggregations — Aggregation / AggregationBucket field coverage + // ========================================================================= + + /** + * Given scenario: 3 html + 2 pdf docs. + * Expected: the terms aggregation on mimeType exposes a populated neutral Aggregation — name, + * type, two buckets with correct doc counts, string keys, null numeric keys (non-numeric) and no + * top-hits — covering the multi-bucket AggregationBucket accessors. + */ + @Test + public void getAggregations_termsBuckets_fieldsPopulated() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final Map aggregations = tool.getAggregations(IDX, TERMS_AGG); + + assertNotNull("Aggregations map must not be null", aggregations); + final Aggregation byMime = aggregations.get("by_mime"); + assertNotNull("'by_mime' aggregation must be present", byMime); + assertEquals("aggregation name must round-trip", "by_mime", byMime.getName()); + assertNotNull("aggregation type must be reported", byMime.getType()); + assertNull("a terms aggregation carries no top-hits", byMime.getHits()); + assertEquals("there must be one bucket per mimeType", 2, byMime.getBuckets().size()); + + long htmlCount = -1; + long pdfCount = -1; + for (final AggregationBucket bucket : byMime.getBuckets()) { + assertTrue("bucket key must be a known mimeType", + EXPECTED_MIMES.contains(bucket.getKey())); + assertEquals("getKeyAsString must mirror getKey", bucket.getKey(), + bucket.getKeyAsString()); + assertNull("a non-numeric key must yield a null number", bucket.getKeyAsNumber()); + assertTrue("each bucket must carry documents", bucket.getDocCount() > 0); + assertTrue("a terms bucket has no sub-aggregations here", + bucket.getAggregations().isEmpty()); + if (MIME_HTML.equals(bucket.getKey())) { + htmlCount = bucket.getDocCount(); + } else if (MIME_PDF.equals(bucket.getKey())) { + pdfCount = bucket.getDocCount(); + } + } + assertEquals("html bucket must count the html docs", HTML_DOCS, htmlCount); + assertEquals("pdf bucket must count the pdf docs", PDF_DOCS, pdfCount); + + Logger.info(this, "✅ getAggregations_termsBuckets_fieldsPopulated passed"); + } + + /** + * Given scenario: a terms aggregation with a nested top_hits sub-aggregation. + * Expected: the neutral tree preserves the nested {@code top_docs} as an Aggregation that carries + * SearchHits, and each SearchHit exposes id and source — covering getHits()/SearchHit fields and + * the nested getAggregations() path. + */ + @Test + public void getAggregations_nestedTopHits_preserved() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final Map aggregations = tool.getAggregations(IDX, NESTED_AGG); + final Aggregation byMime = aggregations.get("by_mime"); + assertNotNull("'by_mime' aggregation must be present", byMime); + assertFalse("'by_mime' must have buckets", byMime.getBuckets().isEmpty()); + + final AggregationBucket firstBucket = byMime.getBuckets().getFirst(); + final Aggregation topDocs = firstBucket.getAggregations().get("top_docs"); + assertNotNull("nested top_hits sub-aggregation must be preserved", topDocs); + assertNotNull("top_hits must carry a SearchHits container", topDocs.getHits()); + + final List hits = topDocs.getHits().getHits(); + assertFalse("top_hits must carry at least one hit", hits.isEmpty()); + final SearchHit hit = hits.getFirst(); + assertNotNull("each top-hit must expose an id", hit.getId()); + assertFalse("each top-hit must expose its source document", + hit.getSourceAsMap().isEmpty()); + + Logger.info(this, "✅ getAggregations_nestedTopHits_preserved passed – topHits: " + hits.size()); + } + + /** + * Given scenario: a numeric histogram on the long field {@code contentLength}. + * Expected: the buckets carry numeric keys, so {@link AggregationBucket#getKeyAsNumber()} returns + * a non-null Number — covering the numeric-key path (distinct from the non-numeric terms keys). + */ + @Test + public void getAggregations_numericHistogram_keyAsNumber() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final Map aggregations = tool.getAggregations(IDX, HISTO_AGG); + final Aggregation byLen = aggregations.get("by_len"); + assertNotNull("'by_len' histogram aggregation must be present", byLen); + assertTrue("histogram type must be reported as a histogram", + byLen.getType().contains("histogram")); + assertFalse("histogram must produce buckets", byLen.getBuckets().isEmpty()); + + boolean sawPopulatedNumericBucket = false; + for (final AggregationBucket bucket : byLen.getBuckets()) { + assertNotNull("a histogram bucket key must be numeric", bucket.getKeyAsNumber()); + if (bucket.getDocCount() > 0) { + sawPopulatedNumericBucket = true; + } + } + assertTrue("at least one histogram bucket must contain documents", sawPopulatedNumericBucket); + + Logger.info(this, "✅ getAggregations_numericHistogram_keyAsNumber passed"); + } + + // ========================================================================= + // getFacets — legacy wrapper coverage (terms / histogram / plain) + // ========================================================================= + + /** + * Given scenario: a terms aggregation with non-empty buckets. + * Expected: getFacets wraps it as an {@link InternalWrapperStringTermsFacet} exposing name/type + * and term entries with term + count — covering the legacy string-terms facet POJO. + */ + @Test + public void getFacets_termsAggregation_wrapsAsStringTermsFacet() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final Map facets = tool.getFacets(IDX, TERMS_AGG); + assertNotNull("Facets map must not be null", facets); + + final Facet facet = facets.get("by_mime"); + assertNotNull("'by_mime' facet must be present", facet); + assertEquals("facet name must round-trip", "by_mime", facet.getName()); + assertNotNull("facet type must be reported", facet.getType()); + assertTrue("non-empty terms aggregation must map to InternalWrapperStringTermsFacet", + facet instanceof InternalWrapperStringTermsFacet); + + final InternalWrapperStringTermsFacet termsFacet = (InternalWrapperStringTermsFacet) facet; + assertEquals("there must be one entry per bucket", 2, termsFacet.entries().size()); + + long htmlCount = -1; + for (final var entry : termsFacet.entries()) { + assertTrue("entry term must be a known mimeType", EXPECTED_MIMES.contains(entry.getTerm())); + assertTrue("entry count must be positive", entry.getCount() > 0); + if (MIME_HTML.equals(entry.getTerm())) { + htmlCount = entry.getCount(); + } + } + assertEquals("html term entry must count the html docs", HTML_DOCS, htmlCount); + + Logger.info(this, "✅ getFacets_termsAggregation_wrapsAsStringTermsFacet passed"); + } + + /** + * Given scenario: a numeric histogram aggregation. + * Expected: getFacets wraps it as an {@link InternalWrapperCountDateHistogramFacet} exposing + * CountEntry rows with time (the numeric key) and count — covering the legacy histogram facet + * POJO and the {@code isHistogram} branch. + */ + @Test + public void getFacets_histogramAggregation_wrapsAsCountHistogramFacet() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final Map facets = tool.getFacets(IDX, HISTO_AGG); + final Facet facet = facets.get("by_len"); + assertNotNull("'by_len' facet must be present", facet); + assertTrue("a histogram aggregation must map to InternalWrapperCountDateHistogramFacet", + facet instanceof InternalWrapperCountDateHistogramFacet); + + final InternalWrapperCountDateHistogramFacet histoFacet = + (InternalWrapperCountDateHistogramFacet) facet; + assertFalse("histogram facet must expose count entries", histoFacet.entries().isEmpty()); + + boolean sawPopulatedEntry = false; + for (final var entry : histoFacet.entries()) { + assertTrue("entry time (numeric key) must be non-negative", entry.getTime() >= 0); + if (entry.getCount() > 0) { + sawPopulatedEntry = true; + } + } + assertTrue("at least one histogram entry must carry a count", sawPopulatedEntry); + + Logger.info(this, "✅ getFacets_histogramAggregation_wrapsAsCountHistogramFacet passed"); + } + + /** + * Given scenario: a terms aggregation whose query matches no document (empty buckets). + * Expected: getFacets falls back to a plain {@link Facet} (neither wrapper), still exposing + * name and type — covering the empty-bucket branch. + */ + @Test + public void getFacets_emptyBuckets_fallsBackToPlainFacet() throws Exception { + final SiteSearchWebAPI tool = siteSearchWebAPI(); + + final Map facets = tool.getFacets(IDX, EMPTY_AGG); + final Facet facet = facets.get("empty"); + assertNotNull("'empty' facet must be present", facet); + assertEquals("facet name must round-trip", "empty", facet.getName()); + assertNotNull("facet type must be reported", facet.getType()); + assertFalse("an empty terms aggregation must NOT be a string-terms wrapper", + facet instanceof InternalWrapperStringTermsFacet); + assertFalse("an empty terms aggregation must NOT be a histogram wrapper", + facet instanceof InternalWrapperCountDateHistogramFacet); + + Logger.info(this, "✅ getFacets_emptyBuckets_fallsBackToPlainFacet passed"); + } +}