diff --git a/_includes/code/graphql.aggregate.groupby.mdx b/_includes/code/graphql.aggregate.groupby.mdx index 46a9203d..bd09b0b4 100644 --- a/_includes/code/graphql.aggregate.groupby.mdx +++ b/_includes/code/graphql.aggregate.groupby.mdx @@ -3,6 +3,9 @@ import TabItem from '@theme/TabItem'; import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; import PyCode from '!!raw-loader!/_includes/code/graphql.aggregate.simple.py'; +import TSCode from '!!raw-loader!/_includes/code/howto/search.aggregate.ts'; +import JavaV6Code from '!!raw-loader!/_includes/code/java-v6/src/test/java/SearchAggregateTest.java'; +import CSharpCode from '!!raw-loader!/_includes/code/csharp/SearchAggregateTest.cs'; @@ -13,6 +16,30 @@ import PyCode from '!!raw-loader!/_includes/code/graphql.aggregate.simple.py'; language="py" /> + + + + + + + + + ```go diff --git a/_includes/code/graphql.aggregate.nearText.mdx b/_includes/code/graphql.aggregate.nearText.mdx index e9e29118..9c542d99 100644 --- a/_includes/code/graphql.aggregate.nearText.mdx +++ b/_includes/code/graphql.aggregate.nearText.mdx @@ -3,6 +3,9 @@ import TabItem from '@theme/TabItem'; import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; import PyCode from '!!raw-loader!/_includes/code/graphql.aggregate.simple.py'; +import TSCode from '!!raw-loader!/_includes/code/howto/search.aggregate.ts'; +import JavaV6Code from '!!raw-loader!/_includes/code/java-v6/src/test/java/SearchAggregateTest.java'; +import CSharpCode from '!!raw-loader!/_includes/code/csharp/SearchAggregateTest.cs'; @@ -13,6 +16,30 @@ import PyCode from '!!raw-loader!/_includes/code/graphql.aggregate.simple.py'; language="py" /> + + + + + + + + + ```go diff --git a/_includes/code/graphql.aggregate.simple.mdx b/_includes/code/graphql.aggregate.simple.mdx index 7c72e35c..63172d1d 100644 --- a/_includes/code/graphql.aggregate.simple.mdx +++ b/_includes/code/graphql.aggregate.simple.mdx @@ -3,6 +3,9 @@ import TabItem from '@theme/TabItem'; import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; import PyCode from '!!raw-loader!/_includes/code/graphql.aggregate.simple.py'; +import TSCode from '!!raw-loader!/_includes/code/howto/search.aggregate.ts'; +import JavaV6Code from '!!raw-loader!/_includes/code/java-v6/src/test/java/SearchAggregateTest.java'; +import CSharpCode from '!!raw-loader!/_includes/code/csharp/SearchAggregateTest.cs'; @@ -13,6 +16,30 @@ import PyCode from '!!raw-loader!/_includes/code/graphql.aggregate.simple.py'; language="py" /> + + + + + + + + + ```go diff --git a/_includes/code/graphql.filters.limit.mdx b/_includes/code/graphql.filters.limit.mdx index 8460741f..3d2aba64 100644 --- a/_includes/code/graphql.filters.limit.mdx +++ b/_includes/code/graphql.filters.limit.mdx @@ -2,6 +2,9 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; import PyCode from '!!raw-loader!/_includes/code/graphql.additional.py'; +import TSCode from '!!raw-loader!/_includes/code/howto/search.basics.ts'; +import JavaV6Code from '!!raw-loader!/_includes/code/java-v6/src/test/java/SearchBasicTest.java'; +import CSharpCode from '!!raw-loader!/_includes/code/csharp/SearchBasicTest.cs'; @@ -12,6 +15,30 @@ import PyCode from '!!raw-loader!/_includes/code/graphql.additional.py'; language="py" /> + + + + + + + + + ```go diff --git a/_includes/code/graphql.filters.offset.mdx b/_includes/code/graphql.filters.offset.mdx index a221e289..34e7ff80 100644 --- a/_includes/code/graphql.filters.offset.mdx +++ b/_includes/code/graphql.filters.offset.mdx @@ -2,6 +2,9 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; import PyCode from '!!raw-loader!/_includes/code/graphql.additional.py'; +import TSCode from '!!raw-loader!/_includes/code/howto/search.basics.ts'; +import JavaV6Code from '!!raw-loader!/_includes/code/java-v6/src/test/java/SearchBasicTest.java'; +import CSharpCode from '!!raw-loader!/_includes/code/csharp/SearchBasicTest.cs'; @@ -12,6 +15,30 @@ import PyCode from '!!raw-loader!/_includes/code/graphql.additional.py'; language="py" /> + + + + + + + + + ```go diff --git a/_includes/code/graphql.get.beacon.mdx b/_includes/code/graphql.get.beacon.mdx index be013c35..a9d6717f 100644 --- a/_includes/code/graphql.get.beacon.mdx +++ b/_includes/code/graphql.get.beacon.mdx @@ -4,6 +4,9 @@ import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBl import PyCode from '!!raw-loader!/_includes/code/graphql.get.simple.py'; import PyCodeV3 from '!!raw-loader!/_includes/code/graphql.get.beacon.v3.py'; +import TSCode from '!!raw-loader!/_includes/code/howto/search.basics.ts'; +import JavaV6Code from '!!raw-loader!/_includes/code/java-v6/src/test/java/SearchBasicTest.java'; +import CSharpCode from '!!raw-loader!/_includes/code/csharp/SearchBasicTest.cs'; @@ -15,6 +18,30 @@ import PyCodeV3 from '!!raw-loader!/_includes/code/graphql.get.beacon.v3.py'; language="py" /> + + + + + + + + + diff --git a/_includes/code/graphql.get.simple.mdx b/_includes/code/graphql.get.simple.mdx index 93efc8e5..70b79679 100644 --- a/_includes/code/graphql.get.simple.mdx +++ b/_includes/code/graphql.get.simple.mdx @@ -4,6 +4,9 @@ import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBl import PyCode from '!!raw-loader!/_includes/code/graphql.get.simple.py'; import PyCodeV3 from '!!raw-loader!/_includes/code/graphql.get.simple.v3.py'; +import TSCode from '!!raw-loader!/_includes/code/howto/search.basics.ts'; +import JavaV6Code from '!!raw-loader!/_includes/code/java-v6/src/test/java/SearchBasicTest.java'; +import CSharpCode from '!!raw-loader!/_includes/code/csharp/SearchBasicTest.cs'; @@ -14,6 +17,30 @@ import PyCodeV3 from '!!raw-loader!/_includes/code/graphql.get.simple.v3.py'; language="py" /> + + + + + + + + + ```go diff --git a/_includes/code/graphql.underscoreproperties.distance.mdx b/_includes/code/graphql.underscoreproperties.distance.mdx index 88f510d8..84dd473d 100644 --- a/_includes/code/graphql.underscoreproperties.distance.mdx +++ b/_includes/code/graphql.underscoreproperties.distance.mdx @@ -3,6 +3,9 @@ import TabItem from '@theme/TabItem'; import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; import PyCode from '!!raw-loader!/_includes/code/graphql.metadata.py'; +import TSCode from '!!raw-loader!/_includes/code/howto/search.basics.ts'; +import JavaV6Code from '!!raw-loader!/_includes/code/java-v6/src/test/java/SearchBasicTest.java'; +import CSharpCode from '!!raw-loader!/_includes/code/csharp/SearchBasicTest.cs'; @@ -13,6 +16,30 @@ import PyCode from '!!raw-loader!/_includes/code/graphql.metadata.py'; language="py" /> + + + + + + + + + ```go diff --git a/docs/weaviate/api/graphql/additional-operators.md b/docs/weaviate/api/graphql/additional-operators.md index 7aa0b8bd..3b6c620d 100644 --- a/docs/weaviate/api/graphql/additional-operators.md +++ b/docs/weaviate/api/graphql/additional-operators.md @@ -5,76 +5,21 @@ description: "Syntax reference for additional operators that extend query functi image: og/docs/api.jpg # tags: ['graphql', 'additional operators'] --- -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; -import TryEduDemo from '/_includes/try-on-edu-demo.mdx'; -import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; -import AutocutPyCode from '!!raw-loader!/_includes/code/howto/search.similarity.py'; -import AutocutPyCodeV3 from '!!raw-loader!/_includes/code/howto/search.similarity-v3.py'; -import AutocutTSCode from '!!raw-loader!/_includes/code/howto/search.similarity.ts'; -import PyCode from '!!raw-loader!/_includes/code/graphql.additional.py'; -import PyCodeV3 from '!!raw-loader!/_includes/code/graphql.additional-v3.py'; -import TSCode from '!!raw-loader!/_includes/code/graphql.additional.ts'; -import GoCode from '!!raw-loader!/_includes/code/graphql.additional.go'; -import JavaCode from '!!raw-loader!/_includes/code/graphql.additional.java'; -import CurlCode from '!!raw-loader!/_includes/code/graphql.additional.sh'; - - - - -## Syntax Functions such as `limit`, `autocut`, and `sort` modify queries at the class level. - +:::tip How-to guide +For sorting, pagination, and cursor usage examples with multi-language code snippets, see [Sort and paginate](../../search/sort-and-paginate.md). +::: ## Limit argument -The `limit` argument restricts the number of results. These functions support `limit`: - -- `Get` -- `Explore` -- `Aggregate` +The `limit` argument restricts the number of results. Supported by `Get`, `Explore`, and `Aggregate`. import GraphQLFiltersLimit from '/_includes/code/graphql.filters.limit.mdx'; -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "title": "Backs on the rack - Vast sums are wasted on treatments for back pain that make it worse" - }, - { - "title": "Graham calls for swift end to impeachment trial, warns Dems against calling witnesses" - }, - { - "title": "Through a cloud, brightly - Obituary: Paul Volcker died on December 8th" - }, - { - "title": "Google Stadia Reviewed \u2013 Against The Stream" - }, - { - "title": "Managing Supply Chain Risk" - } - ] - } - } -} -``` - -
## Pagination with `offset` @@ -88,37 +33,6 @@ import GraphQLFiltersOffset from '/_includes/code/graphql.filters.offset.mdx'; -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "title": "Through a cloud, brightly - Obituary: Paul Volcker died on December 8th" - }, - { - "title": "Google Stadia Reviewed \u2013 Against The Stream" - }, - { - "title": "Managing Supply Chain Risk" - }, - { - "title": "Playing College Football In Madden" - }, - { - "title": "The 50 best albums of 2019, No 3: Billie Eilish \u2013 When We All Fall Asleep, Where Do We Go?" - } - ] - } - } -} -``` - -
- ### Performance considerations Pagination is not a cursor-based implementation. This has the following implications: @@ -126,92 +40,38 @@ Pagination is not a cursor-based implementation. This has the following implicat - **Response time and system load increase as the number of pages grows**. As the offset grows, each additional page request requires a new, larger call against your collection. For example, if your `offset` and `limit` specify results from 21-30, Weaviate retrieves 30 objects and drops the first 20. On the next call, Weaviate retrieves 40 objects and drops the first 30. - **Resource requirements are amplified in multi-shard configurations.** Each shard retrieves a full list of objects. Each shard also drops the objects before the offset. If you have 10 shards configured and ask for results 91-100, Weaviate retrieves 1000 objects (100 per shard) and drops 990 of them. - **The number of objects you can retrieve is limited**. A single query returns up to `QUERY_MAXIMUM_RESULTS`. If the sum of `offset` and `limit` exceeds `QUERY_MAXIMUM_RESULTS`, Weaviate returns an error. To change the limit, edit the `QUERY_MAXIMUM_RESULTS` environment variable. If you increase `QUERY_MAXIMUM_RESULTS`, use the lowest value possible to avoid performance problems. - - **Pagination is not stateful**. If the database state changes between calls, your pages might miss results. An insertion or a deletion will change the object count. An update could change object order. However, if there are no writes the overall results set is the same if you retrieve a large single page or many smaller ones. +- **Pagination is not stateful**. If the database state changes between calls, your pages might miss results. An insertion or a deletion will change the object count. An update could change object order. However, if there are no writes the overall results set is the same if you retrieve a large single page or many smaller ones. +For large-scale sequential retrieval, use the [cursor API](#cursor-with-after). -## Autocut - -The autocut function limits results based on discontinuities in the result set. Specifically, autocut looks for discontinuities, or jumps, in result metrics such as vector distance or search score. -To use autocut, specify how many jumps there should be in your query. The query stops returning results after the specified number of jumps. +## Autocut -For example, consider a `nearText` search that returns objects with these distance values: +The autocut function limits results based on discontinuities (jumps) in result metrics such as vector distance or search score. - `[0.1899, 0.1901, 0.191, 0.21, 0.215, 0.23]`. +Specify how many jumps to allow. The query stops returning results after that many jumps. -Autocut returns the following: +For example, with distances `[0.1899, 0.1901, 0.191, 0.21, 0.215, 0.23]`: - `autocut: 1`: `[0.1899, 0.1901, 0.191]` - `autocut: 2`: `[0.1899, 0.1901, 0.191, 0.21, 0.215]` - `autocut: 3`: `[0.1899, 0.1901, 0.191, 0.21, 0.215, 0.23]` -Autocut works with these functions: - -- `nearXXX` -- `bm25` -- `hybrid` - -To use autocut with the `hybrid` search, specify the `relativeScoreFusion` ranking method. - -Autocut is disabled by default. To explicitly disable autocut, set the number of jumps to `0` or a negative value. +Works with `nearXXX`, `bm25`, and `hybrid` (requires `relativeScoreFusion` for hybrid). Disabled by default; set to `0` or a negative value to explicitly disable. If autocut is combined with the limit filter, autocut only considers the first objects returned up to the value of `limit`. - - -Sample client code: - - - - - - - - - - - - - -
- Example response - -The output is like this: - - - -
- -For more client code examples for each functional category, see these pages: - -- [Autocut with similarity search](../../search/similarity.md#limit-result-groups). -- [Autocut with `bm25` search](../../search/bm25.md#limit-result-groups). -- [Autocut with `hybrid` search](../../search/hybrid.md#limit-result-groups). +For client code examples: +- [Autocut with similarity search](../../search/similarity.md#limit-result-groups) +- [Autocut with BM25 search](../../search/bm25.md#limit-result-groups) +- [Autocut with hybrid search](../../search/hybrid.md#limit-result-groups) ## Cursor with `after` -Starting with version `v1.18`, you can use `after` to retrieve objects sequentially. For example, you can use `after` to retrieve a complete set of objects from a collection. +The `after` operator retrieves objects sequentially using a cursor based on object IDs. Compatible with single-shard and multi-shard configurations. -`after` creates a cursor that is compatible with single shard and multi-shard configurations. - -The `after` function relies on object ids, and thus it only works with list queries. `after` is not compatible with `where`, `near`, `bm25`, `hybrid`, or similar searches, or in combination with filters. For those use cases, use pagination with `offset` and `limit`. +`after` only works with list queries. It is **not** compatible with `where`, `near`, `bm25`, `hybrid`, or similar search operators. For those, use [pagination with `offset`](#pagination-with-offset). import GraphQLFiltersAfter from '/_includes/code/graphql.filters.after.mdx'; @@ -265,7 +125,7 @@ import GraphQLFiltersAfter from '/_includes/code/graphql.filters.after.mdx'; ## Sorting -You can sort results by any primitive property, such as `text`, `number`, or `int`. +Sort results by any primitive property (such as `text`, `number`, or `int`). Sorting is **unavailable when using search operators** (which rank by relevance). ### Sorting considerations @@ -277,13 +137,16 @@ Weaviate does not use any sorting-specific data structures on disk. When objects ### Sort order -#### boolean values +#### Boolean values + `false` is considered smaller than `true`. `false` comes before `true` in ascending order and after `true` in descending order. -#### null values +#### Null values + `null` values are considered smaller than any non-`null` values. `null` values come first in ascending order and last in descending order. -#### arrays +#### Arrays + Arrays are compared by each element separately. Elements at the same position are compared to each other, starting from the beginning of an array. When Weaviate finds an array element in one array that is smaller than its counterpart in the second array, Weaviate considers the whole first array to be smaller than the second one. Arrays are equal if they have the same length and all elements are equal. If one array is subset of another array it is considered smaller. @@ -296,233 +159,40 @@ Examples: ### Sorting API -Sorting can be performed by one or more properties. If the values for the first property are identical, Weaviate uses the second property to determine the order, and so on. The sort function takes either an object, or an array of objects, that describe a property and a sort order. | Parameter | Required | Type | Description | |-----------|----------|-----------------|-----------------------------------------------------------| -| `path` | yes | `text` | The path to the sort field is an single element array that contains the field name. GraphQL supports specifying the field name directly. | -| `order` | varies by client | `asc` or `desc` | The sort order, ascending (default) or descending.| - - - - - - - - - - - - - - - - - - - - - - - - - - +| `path` | yes | `text` | Single-element array containing the field name. GraphQL supports specifying the field name directly. | +| `order` | varies by client | `asc` or `desc` | Sort order, ascending (default) or descending. | -
- Expected response - -```json -{ - "data": { - "Get": { - "JeopardyQuestion": [ - { - "answer": "$5 (Lincoln Memorial in the background)", - "points": 600, - "question": "A sculpture by Daniel Chester French can be seen if you look carefully on the back of this current U.S. bill" - }, - { - "answer": "(1 of 2) Juneau, Alaska or Augusta, Maine", - "points": 0, - "question": "1 of the 2 U.S. state capitals that begin with the names of months" - }, - { - "answer": "(1 of 2) Juneau, Alaska or Honolulu, Hawaii", - "points": 0, - "question": "One of the 2 state capitals whose names end with the letter \"U\"" - } - ] - } - } -} -``` +Sorting can be performed by one or more properties. If the values for the first property are identical, Weaviate uses the second property to determine the order, and so on. To sort by more than one property, pass an array of `{ path, order }` objects to the sort function. -
- -#### Sorting by multiple properties - -To sort by more than one property, pass an array of { `path`, `order` } objects to the sort function: - - - - - - - - - - - - - - - - - - - - - - - - - - -#### Metadata properties +### Metadata properties To sort with metadata, add an underscore to the property name. -| Property Name | Sort Property Name | +| Property Name | Sort Property Name | | :- | :- | | `id` | `_id` | | `creationTimeUnix` | `_creationTimeUnix` | | `lastUpdateTimeUnix` | `_lastUpdateTimeUnix` | - - - - - - - - - - - - - - - - - - - - - - - - -
- Python client v4 property names - -| Property Name | Sort Property Name | -| :- | :- | -| `uuid` |`_id` | -| `creation_time` | `_creationTimeUnix` | -| `last_update_time` | `_lastUpdateTimeUnix` | - -
+For sorting code examples, see [Sort and paginate: Sorting](../../search/sort-and-paginate.md#sorting). ## Grouping -You can use a group to combine similar concepts (also known as _entity merging_). There are two ways of grouping semantically similar objects together, `closest` and `merge`. To return the closest concept, set `type: closest`. To combine similar entities into a single string, set `type: merge` +You can use a group to combine similar concepts (also known as _entity merging_). There are two ways of grouping semantically similar objects together, `closest` and `merge`. To return the closest concept, set `type: closest`. To combine similar entities into a single string, set `type: merge`. + +When using `merge`, the central concept in the group leads the group. Related values follow in parentheses. ### Variables | Variable | Required | Type | Description | -| --------- | -------- | ---- | ----------- | -| `type` | yes | `string` | Either `closest` or `merge` | -| `force` | yes | `float` | The force to apply for a particular movements.
Must be between `0` and `1`. `0` is no movement. `1` is maximum movement. | - -### Example +| --- | --- | --- | --- | +| `type` | yes | `string` | Either `closest` or `merge`. | +| `force` | yes | `float` | The force to apply for a particular movement. Must be between `0` and `1`. `0` is no movement. `1` is maximum movement. | import GraphQLFiltersGroup from '/_includes/code/graphql.filters.group.mdx'; diff --git a/docs/weaviate/api/graphql/additional-properties.md b/docs/weaviate/api/graphql/additional-properties.md index 77756a5b..a07888da 100644 --- a/docs/weaviate/api/graphql/additional-properties.md +++ b/docs/weaviate/api/graphql/additional-properties.md @@ -6,12 +6,13 @@ image: og/docs/api.jpg --- import SkipLink from '/src/components/SkipValidationLink' -import TryEduDemo from '/_includes/try-on-edu-demo.mdx'; - - Various 'additional properties', also called 'metadata', can be retrieved in queries. +:::tip How-to guide +For usage examples with multi-language code snippets, see [Query basics: Retrieve metadata values](../../search/basics.md#retrieve-metadata-values). +::: + ### Available additional properties The fields `id`, `vector`, `certainty`, `distance`, `featureProjection` and `classification` are available by default. @@ -30,7 +31,7 @@ Each of the client libraries may handle this differently. See the examples below An example query getting the [UUID](#id) and the [distance](#distance). -import GraphQLUnderscoreDistance from '/_includes/code/graphql.underscoreproperties.distance.mdx'; +import GraphQLUnderscoreDistance from '/\_includes/code/graphql.underscoreproperties.distance.mdx'; @@ -86,7 +87,6 @@ A `generate` query will cause corresponding additional result fields to be avail For examples, see the [related how-to page](../../search/generative.md). - ### rerank :::info Requires a [reranker integration](../../model-providers/index.md) @@ -94,16 +94,15 @@ For examples, see the [related how-to page](../../search/generative.md). The `rerank` field can be used to [reorder the search results](../../search/rerank.md). It accepts two parameters: -| Parameter | Required | Type | Description | -|--------------|----------|------------|--------------| -| `property` | yes | `string` | Which property to pass to the reranker. For example, you may want to run a similarity search on a Products collection, then rerank specifically on the Name field. | -| `query` | no | `string` | Optionally specify a different query. | +| Parameter | Required | Type | Description | +| ---------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `property` | yes | `string` | Which property to pass to the reranker. For example, you may want to run a similarity search on a Products collection, then rerank specifically on the Name field. | +| `query` | no | `string` | Optionally specify a different query. | A `rerank` query will cause corresponding additional `score` field to be available. For examples, see the [related how-to page](../../search/rerank.md). - ### creationTimeUnix Use the `creationTimeUnix` field to fetch the data object creation timestamp. @@ -140,25 +139,23 @@ The `score` will be the BM25F score of the result. Note that this score is relat The `explainScore` will explain the BM25F score of the result, broken down into its components. This can be used to understand why a result was scored the way it was. - ### Hybrid search metadata Use the `score` and `explainScore` field to fetch the scores and explanations of each result of a hybrid search. #### Score -The `score` will be the hybrid score of the result, based on the nominated [fusion algorithm](./search-operators.md#fusion-algorithms). Note that this score is relative to the dataset and query. +The `score` will be the hybrid score of the result, based on the nominated [fusion algorithm](../../search/hybrid.md#fusion-algorithms). Note that this score is relative to the dataset and query. #### ExplainScore The `explainScore` will be the hybrid score of the result, broken down into its vector and keyword search components. This can be used to understand why a result was scored the way it was. - ### Classification When a data-object has been subjected to classification, you can get additional information about how the object was classified by running the following command: -import GraphQLUnderscoreClassification from '/_includes/code/graphql.underscoreproperties.classification.mdx'; +import GraphQLUnderscoreClassification from '/\_includes/code/graphql.underscoreproperties.classification.mdx'; @@ -168,17 +165,17 @@ Use feature projection to reduce the results' vectors to 2d or 3d for easy visua To tweak the feature projection optional parameters (currently GraphQL-only) can be provided. The values and their defaults are: -| Parameter | Type | Default | Implication | -|--|--|--|--| -| `dimensions` | `int` | `2` | Target dimensionality, usually `2` or `3` | -| `algorithm` | `string` | `tsne` | Algorithm to be used, currently supported: `tsne` | -| `perplexity` | `int` | `min(5, len(results)-1)` | The `t-SNE` perplexity value, must be smaller than the `n-1` where `n` is the number of results to be visualized | -| `learningRate` | `int` | `25` | The `t-SNE` learning rate | -| `iterations` | `int` | `100` | The number of iterations the `t-SNE` algorithm runs. Higher values lead to more stable results at the cost of a larger response time | +| Parameter | Type | Default | Implication | +| -------------- | -------- | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------ | +| `dimensions` | `int` | `2` | Target dimensionality, usually `2` or `3` | +| `algorithm` | `string` | `tsne` | Algorithm to be used, currently supported: `tsne` | +| `perplexity` | `int` | `min(5, len(results)-1)` | The `t-SNE` perplexity value, must be smaller than the `n-1` where `n` is the number of results to be visualized | +| `learningRate` | `int` | `25` | The `t-SNE` learning rate | +| `iterations` | `int` | `100` | The number of iterations the `t-SNE` algorithm runs. Higher values lead to more stable results at the cost of a larger response time | An example with default settings: -import GraphQLUnderscoreFeature from '/_includes/code/graphql.underscoreproperties.featureprojection.mdx'; +import GraphQLUnderscoreFeature from '/\_includes/code/graphql.underscoreproperties.featureprojection.mdx'; @@ -225,14 +222,14 @@ The above result can be plotted as follows (where the result in red is the first ![Weaviate T-SNE example](./img/plot-noSettings.png?i=1 "Weaviate T-SNE example") -#### best practices and notes -* Due to the O(n^2) complexity of the `t-SNE` algorithm, we recommend to keep the request size at or below 100 items. -* `t-SNE` is non-deterministic and lossy, and happens in real-time per query. The dimensions returned have no meaning across queries. -* Due to the relatively high cost of the underlying algorithm, we recommend to limit requests including a `featureProjection` in high-load situations where response time matters. Avoid parallel requests including a `featureProjection`, so that some threads stay available to serve other, time-critical requests. +#### Best practices and notes +- Due to the O(n^2) complexity of the `t-SNE` algorithm, we recommend to keep the request size at or below 100 items. +- `t-SNE` is non-deterministic and lossy, and happens in real-time per query. The dimensions returned have no meaning across queries. +- Due to the relatively high cost of the underlying algorithm, we recommend to limit requests including a `featureProjection` in high-load situations where response time matters. Avoid parallel requests including a `featureProjection`, so that some threads stay available to serve other, time-critical requests. ## Questions and feedback -import DocsFeedback from '/_includes/docs-feedback.mdx'; +import DocsFeedback from '/\_includes/docs-feedback.mdx'; diff --git a/docs/weaviate/api/graphql/aggregate.md b/docs/weaviate/api/graphql/aggregate.md index 21e47705..87eef934 100644 --- a/docs/weaviate/api/graphql/aggregate.md +++ b/docs/weaviate/api/graphql/aggregate.md @@ -8,16 +8,14 @@ image: og/docs/api.jpg import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; -import TryEduDemo from '/_includes/try-on-edu-demo.mdx'; - - - -# Overview - This page covers aggregation queries. They are collectively referred to as `Aggregate` queries within. An `Aggregate` query can aggregate over an entire collection, or the [results of a search](#aggregating-a-vector-search--faceted-vector-search). +:::tip How-to guide +For usage examples with multi-language code snippets, see [Aggregation](../../search/aggregate.md). +::: + ### Parameters diff --git a/docs/weaviate/api/graphql/filters.md b/docs/weaviate/api/graphql/filters.md index 85533951..0a4296cf 100644 --- a/docs/weaviate/api/graphql/filters.md +++ b/docs/weaviate/api/graphql/filters.md @@ -1,88 +1,59 @@ --- title: Conditional filters sidebar_position: 35 -description: "GraphQL filtering documentation for applying conditional logic to refine search results." +description: "GraphQL filtering reference: operator list, filter structure, path syntax, and value types." image: og/docs/api.jpg # tags: ['graphql', 'filters'] --- +Conditional filters may be added to [`Object-level`](./get.md) and [`Aggregate`](./aggregate.md) queries, as well as [batch deletion](../../manage-objects/delete.mdx#delete-multiple-objects). The operator used for filtering is called a `where` filter. -import TryEduDemo from '/_includes/try-on-edu-demo.mdx'; +:::tip How-to guide +For usage examples with multi-language code snippets, see [Filters](../../search/filters.md). +::: - +## Filter structure -Conditional filters may be added to queries such as [`Object-level`](./get.md) and [`Aggregate`](./aggregate.md) queries, as well as [batch deletion](../../manage-objects/delete.mdx#delete-multiple-objects). The operator used for filtering is also called a `where` filter. +The `where` filter is an [algebraic object](https://en.wikipedia.org/wiki/Algebraic_structure) with the following arguments: -A filter may consist of one or more conditions, which are combined using the `And` or `Or` operators. Each condition consists of a property path, an operator, and a value. +### Operators +| Operator | Description | +| --- | --- | +| `And` | All operands must match. | +| `Or` | At least one operand must match. | +| `Not` | Negate a condition. | +| `Equal` | Exact match. | +| `NotEqual` | Inverse of `Equal`. | +| `GreaterThan` | Greater than comparison. | +| `GreaterThanEqual` | Greater than or equal. | +| `LessThan` | Less than comparison. | +| `LessThanEqual` | Less than or equal. | +| `Like` | Partial text match with `?` (one char) and `*` (zero+ chars) wildcards. See [details](#like). | +| `WithinGeoRange` | Geo-coordinate radius search. | +| `IsNull` | Filter by null/non-null state. | +| `ContainsAny` | Array/text contains at least one of the values. See [details](#containsany--containsall--containsnone). | +| `ContainsAll` | Array/text contains all of the values. See [details](#containsany--containsall--containsnone). | +| `ContainsNone` | Array/text contains none of the values. See [details](#containsany--containsall--containsnone). | -## Single operand (condition) +If the operator is `And` or `Or`, the operands are a list of nested `where` filters. -Each set of algebraic conditions is called an "operand". For each operand, the required properties are: -- The operator type, -- The property path, and -- The value as well as the value type. +### Path -For example, this filter will only allow objects from the class `Article` with a `wordCount` that is `GreaterThan` than `1000`. +A list of strings in [XPath](https://en.wikipedia.org/wiki/XPath#Abbreviated_syntax) style indicating the property name. -import GraphQLFiltersWhereSimple from '/_includes/code/graphql.filters.where.simple.mdx'; +For cross-references, follow the path as a list. For example, an `inPublication` reference to a `Publication` collection targeting the `name` property: `["inPublication", "Publication", "name"]`. - +### Value types -
- Expected response - -``` -{ - "data": { - "Get": { - "Article": [ - { - "title": "Anywhere but Washington: an eye-opening journey in a deeply divided nation" - }, - { - "title": "The world is still struggling to implement meaningful climate policy" - }, - ... - ] - } - } -} -``` - -
- -## Filter structure - -The `where` filter is an [algebraic object](https://en.wikipedia.org/wiki/Algebraic_structure), which takes the following arguments: - -- `Operator` (which takes one of the following values) - - `And` - - `Or` - - `Not` - - `Equal` - - `NotEqual` - - `GreaterThan` - - `GreaterThanEqual` - - `LessThan` - - `LessThanEqual` - - `Like` - - `WithinGeoRange` - - `IsNull` - - `ContainsAny` (*Only for array and text properties) - - `ContainsAll` (*Only for array and text properties) - - `ContainsNone` (*Only for array and text properties) -- `Path`: Is a list of strings in [XPath](https://en.wikipedia.org/wiki/XPath#Abbreviated_syntax) style, indicating the property name of the collection. - - If the property is a cross-reference, the path should be followed as a list of strings. For a `inPublication` reference property that refers to `Publication` collection, the path selector for `name` will be `["inPublication", "Publication", "name"]`. -- `valueType` - - `valueInt`: For `int` data type. - - `valueBoolean`: For `boolean` data type. - - `valueString`: For `string` data type (note: `string` has been deprecated). - - `valueText`: For `text`, `uuid`, `geoCoordinates`, `phoneNumber` data types. - - `valueNumber`: For `number` data type. - - `valueDate`: For `date` (ISO 8601 timestamp, formatted as [RFC3339](https://datatracker.ietf.org/doc/rfc3339/)) data type. - -If the operator is `And` or `Or`, the operands are a list of `where` filters. +| valueType | Data types | +| --- | --- | +| `valueInt` | `int` | +| `valueBoolean` | `boolean` | +| `valueString` | `string` (deprecated) | +| `valueText` | `text`, `uuid`, `geoCoordinates`, `phoneNumber` | +| `valueNumber` | `number` | +| `valueDate` | `date` (ISO 8601 / [RFC 3339](https://datatracker.ietf.org/doc/rfc3339/) format) |
Example filter structure (GraphQL) @@ -115,74 +86,15 @@ If the operator is `And` or `Or`, the operands are a list of `where` filters.
-
- Example response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "title": "Opinion | John Lennon Told Them ‘Girls Don't Play Guitar.' He Was So Wrong." - } - ] - } - }, - "errors": null -} -``` - -
- ### Filter behaviors #### Multi-word queries in `Equal` filters -The behavior for the `Equal` operator on multi-word textual properties in `where` filters depends on the `tokenization` of the property. - -See the [Schema property tokenization section](../../config-refs/collections.mdx#tokenization) for the difference between the available tokenization types. +The behavior for the `Equal` operator on multi-word textual properties depends on the `tokenization` of the property. See [tokenization](../../config-refs/collections.mdx#tokenization). #### Stopwords in `text` filters -Starting with `v1.12.0` you can configure your own [stopword lists for the inverted index](/weaviate/config-refs/indexing/inverted-index.mdx#stopwords). - -## Multiple operands - -You can set multiple operands or [nest conditions](../../search/filters.md#nested-filters). - -:::tip -You can filter datetimes similarly to numbers, with the `valueDate` given as `string` in [RFC3339](https://datatracker.ietf.org/doc/rfc3339/) format. -::: - -import GraphQLFiltersWhereOperands from '/_includes/code/graphql.filters.where.operands.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "title": "China\u2019s long-distance lorry drivers are unsung heroes of its economy" - }, - { - "title": "\u2018It\u2019s as if there\u2019s no Covid\u2019: Nepal defies pandemic amid a broken economy" - }, - { - "title": "A tax hike threatens the health of Japan\u2019s economy" - } - ] - } - } -} -``` - -
+You can configure your own [stopword lists](/weaviate/config-refs/indexing/inverted-index.mdx#stopwords). ## Filter operators @@ -196,38 +108,6 @@ The `Like` operator filters `text` data based on partial matches. It can be used - `car*` matches `car`, `care`, `carpet`, etc - `*car*` matches `car`, `healthcare`, etc. -import GraphQLFiltersWhereLike from '/_includes/code/graphql.filters.where.like.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Publication": [ - { - "name": "The New York Times Company" - }, - { - "name": "International New York Times" - }, - { - "name": "New York Times" - }, - { - "name": "New Yorker" - } - ] - } - } -} -``` - -
- #### Performance of `Like` Each `Like` filter iterates over the entire inverted index for that property. The search time will go up linearly with the dataset size, and may become slow for large datasets. @@ -245,10 +125,9 @@ Both operators expect an array of values and return objects that match based on :::note `ContainsAny`/`ContainsAll`/`ContainsNone` notes: - The `ContainsAny`, `ContainsAll` and `ContainsNone` operators treat texts as an array. The text is split into an array of tokens based on the chosen tokenization scheme, and the search is performed on that array. -- When using `ContainsAny`, `ContainsAll` and `ContainsNone` with the REST api for [batch deletion](../../manage-objects/delete.mdx#delete-multiple-objects), the text array must be specified with the `valueTextArray` argument. This is different from the usage in search, where the `valueText` argument that can be used. +- When using `ContainsAny`, `ContainsAll` and `ContainsNone` with the REST api for [batch deletion](../../manage-objects/delete.mdx#delete-multiple-objects), the text array must be specified with the `valueTextArray` argument. This is different from the usage in search, where the `valueText` argument can be used. ::: - #### `ContainsAny` `ContainsAny` returns objects where at least one of the values from the input array is present. @@ -279,90 +158,24 @@ import RangeFilterPerformanceNote from '/_includes/range-filter-performance-note ### By id -You can filter object by their unique id or uuid, where you give the `id` as `valueText`. - -import GraphQLFiltersWhereId from '/_includes/code/graphql.filters.where.id.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "title": "Backs on the rack - Vast sums are wasted on treatments for back pain that make it worse" - } - ] - } - } -} -``` - -
+You can filter objects by their unique id or uuid, where you give the `id` as `valueText`. ### By timestamps Filtering can be performed with internal timestamps as well, such as `creationTimeUnix` and `lastUpdateTimeUnix`. These values can be represented either as Unix epoch milliseconds, or as [RFC3339](https://datatracker.ietf.org/doc/rfc3339/) formatted datetimes. Note that epoch milliseconds should be passed in as a `valueText`, and an RFC3339 datetime should be a `valueDate`. :::info -Filtering by timestamp requires the target class to be configured to index timestamps. See [here](/weaviate/config-refs/indexing/inverted-index.mdx#indextimestamps) for details. +Filtering by timestamp requires the target class to be configured to index timestamps. See [here](/weaviate/config-refs/indexing/inverted-index.mdx#indextimestamps) for details. ::: -import GraphQLFiltersWhereTimestamps from '/_includes/code/graphql.filters.where.timestamps.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "title": "Army builds new body armor 14-times stronger in the face of enemy fire" - }, - ... - ] - } - } -} -``` - -
- ### By property length -Filtering can be performed with the length of properties. - -The length of properties is calculated differently depending on the type: -- array types: the number of entries in the array is used, where null (property not present) and empty arrays both have the length 0. -- strings and texts: the number of characters (unicode characters such as 世 count as one character). -- numbers, booleans, geo-coordinates, phone-numbers and data-blobs are not supported. - -```graphql -{ - Get { - ( - where: { - operator: , - valueInt: , - path: ["len()"] - } - ) - } -} -``` -Supported operators are `(not) equal` and `greater/less than (equal)` and values need to be 0 or larger. - -Note that the `path` value is a string, where the property name is wrapped in `len()`. For example, to filter for objects based on the length of the `title` property, you would use `path: ["len(title)"]`. +Filter by the length of properties using `path: ["len()"]`. Supported operators: `Equal`, `NotEqual`, `GreaterThan`, `GreaterThanEqual`, `LessThan`, `LessThanEqual`. Values must be 0 or larger. -To filter for `Article` class objects with `title` length greater than 10, you would use: +Length calculation: +- **Array types**: number of entries (null and empty = 0). +- **Strings/texts**: number of unicode characters. +- Numbers, booleans, geo-coordinates, phone-numbers, and data-blobs are not supported. ```graphql { @@ -379,138 +192,20 @@ To filter for `Article` class objects with `title` length greater than 10, you w ``` :::note -Filtering by property length requires the target class to be [configured to index the length](/weaviate/config-refs/indexing/inverted-index.mdx#indexpropertylength). +Filtering by property length requires [indexing to be enabled](/weaviate/config-refs/indexing/inverted-index.mdx#indexpropertylength). ::: ### By cross-references -You can also search for the value of the property of a cross-references, also called beacons. - -For example, these filters select based on the class Article but who have `inPublication` set to New Yorker. - -import GraphQLFiltersWhereBeacon from '/_includes/code/graphql.filters.where.beacon.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "inPublication": [ - { - "name": "New Yorker" - } - ], - "title": "The Hidden Costs of Automated Thinking" - }, - { - "inPublication": [ - { - "name": "New Yorker" - } - ], - "title": "The Real Deal Behind the U.S.\u2013Iran Prisoner Swap" - }, - ... - ] - } - } -} -``` - -
+You can filter based on the value of a property of a cross-referenced object. The path should follow the cross-reference chain, e.g. `["inPublication", "Publication", "name"]`. ### By count of reference -Above example shows how filter by reference can solve straightforward questions like "Find all articles that are published by New Yorker". But questions like "Find all articles that are written by authors that wrote at least two articles", cannot be answered by the above query structure. It is however possible to filter by reference count. To do so, simply provide one of the existing compare operators (`Equal`, `LessThan`, `LessThanEqual`, `GreaterThan`, `GreaterThanEqual`) and use it directly on the reference element. For example: - -import GraphQLFiltersWhereBeaconCount from '/_includes/code/graphql.filters.where.beacon.count.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Author": [ - { - "name": "Agam Shah", - "writesFor": [ - { - "name": "Wall Street Journal" - }, - { - "name": "Wall Street Journal" - } - ] - }, - { - "name": "Costas Paris", - "writesFor": [ - { - "name": "Wall Street Journal" - }, - { - "name": "Wall Street Journal" - } - ] - }, - ... - ] - } - } -} -``` - -
+You can filter by reference count using comparison operators (`Equal`, `LessThan`, `LessThanEqual`, `GreaterThan`, `GreaterThanEqual`) directly on the reference property. For example, to find all authors who wrote at least two articles, filter `writesFor` with `GreaterThanEqual` and `valueInt: 2`. ### By geo coordinates -A special case of the `Where` filter is with geoCoordinates. This filter is only supported by the `Get{}` function. If you've set the `geoCoordinates` property type, you can search in an area based on kilometers. - -For example, this curious returns all in a radius of 2KM around a specific geo-location: - -import GraphQLFiltersWhereGeocoords from '/_includes/code/graphql.filters.where.geocoordinates.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Publication": [ - { - "headquartersGeoLocation": { - "latitude": 51.512737, - "longitude": -0.0962234 - }, - "name": "Financial Times" - }, - { - "headquartersGeoLocation": { - "latitude": 51.512737, - "longitude": -0.0962234 - }, - "name": "International New York Times" - } - ] - } - } -} -``` - -
+The `WithinGeoRange` operator filters objects within a radius from a point. It requires a `geoCoordinates` property with `latitude` and `longitude`, and a `distance` with `max` in kilometers. Note that `geoCoordinates` uses a vector index under the hood. @@ -520,7 +215,7 @@ import GeoLimitations from '/_includes/geo-limitations.mdx'; ### By null state -Using the `IsNull` operator allows you to do filter for objects where given properties are `null` or `not null`. Note that zero-length arrays and empty strings are equivalent to a null value. +Using the `IsNull` operator allows you to filter for objects where given properties are `null` or `not null`. Note that zero-length arrays and empty strings are equivalent to a null value. ```graphql { @@ -534,15 +229,12 @@ Using the `IsNull` operator allows you to do filter for objects where given prop ``` :::note -Filtering by null-state requires the target class to be configured to index this. See [here](../../config-refs/indexing/inverted-index.mdx#indexnullstate) for details. +Filtering by null state requires [indexing to be enabled](../../config-refs/indexing/inverted-index.mdx#indexnullstate). ::: +## Further resources -## Related pages - -- [How-to search: Filters](../../search/filters.md) - - +- [How-to: Filters](../../search/filters.md) ## Questions and feedback diff --git a/docs/weaviate/api/graphql/get.md b/docs/weaviate/api/graphql/get.md index d00de555..1a090ea3 100644 --- a/docs/weaviate/api/graphql/get.md +++ b/docs/weaviate/api/graphql/get.md @@ -8,13 +8,8 @@ image: og/docs/api.jpg import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; -import TryEduDemo from '/_includes/try-on-edu-demo.mdx'; - - - This page covers object-level query functions. They are collectively referred to as `Get` queries within. - ### Parameters A `Get` query requires the target collection to be specified. @@ -28,22 +23,21 @@ A `Get` query requires the target collection to be specified. Each `Get` query can include any of the following types of arguments: -| Argument | Description | Required | -| -------- | ----------- | -------- | -| Collection | Also called "class". The object collection to be retrieved from. | Yes | -| Properties | Properties to be retrieved | Yes (GraphQL)
(No if using gRPC API) | -| Cross-references | Cross-references to be retrieved | No | -| [Metadata](./additional-properties.md) | Metadata (additional properties) to be retrieved | No | -| [Conditional filters](./filters.md) | Filter the objects to be retrieved | No | -| [Search operators](./search-operators.md) | Specify the search strategy (e.g. near text, hybrid, bm25) | No | -| [Additional operators](./additional-operators.md) | Specify additional operators (e.g. limit, offset, sort) | No | -| [Tenant name](#multi-tenancy) | Specify the tenant name | Yes, if multi-tenancy enabled. ([Read more: what is multi-tenancy?](../../concepts/data.md#multi-tenancy)) | -| [Consistency level](#consistency-levels) | Specify the consistency level | No | - +| Argument | Description | Required | +| ------------------------------------------------- | ---------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | +| Collection | Also called "class". The object collection to be retrieved from. | Yes | +| Properties | Properties to be retrieved | Yes (GraphQL)
(No if using gRPC API) | +| Cross-references | Cross-references to be retrieved | No | +| [Metadata](./additional-properties.md) | Metadata (additional properties) to be retrieved | No | +| [Conditional filters](./filters.md) | Filter the objects to be retrieved | No | +| [Search operators](./search-operators.md) | Specify the search strategy (e.g. near text, hybrid, bm25) | No | +| [Additional operators](./additional-operators.md) | Specify additional operators (e.g. limit, offset, sort) | No | +| [Tenant name](#multi-tenancy) | Specify the tenant name | Yes, if multi-tenancy enabled. ([Read more: what is multi-tenancy?](../../concepts/data.md#multi-tenancy)) | +| [Consistency level](#consistency-levels) | Specify the consistency level | No | #### Example usage -import GraphQLGetSimple from '/_includes/code/graphql.get.simple.mdx'; +import GraphQLGetSimple from '/\_includes/code/graphql.get.simple.mdx'; @@ -77,20 +71,18 @@ Accordingly, such a `Get` query is not suitable for a substantive object retriev :::tip Read more -- [How-to search: Basics](../../search/basics.md) -::: -### `Get` groupBy +- [How-to: Query & Search - Search patterns and basics](../../search/basics.md) + ::: -You can use retrieve groups of objects that match the query. +### `Get` groupBy -The groups are defined by a property, and the number of groups and objects per group can be limited. +You can group objects that match a query by property. The number of groups and objects per group can be limited. -import GroupbyLimitations from '/_includes/groupby-limitations.mdx'; +import GroupbyLimitations from '/\_includes/groupby-limitations.mdx'; - #### Syntax ```graphql @@ -126,43 +118,28 @@ import GroupbyLimitations from '/_includes/groupby-limitations.mdx'; } ``` -#### Example usage: - - -import GraphQLGroupBy from '/_includes/code/graphql.get.groupby.mdx'; - - - +For usage examples, see [How-to: Query & Search - Group results by property](../../search/basics.md#group-results-by-property). ### Consistency levels -Where replication is enabled, you can specify a `consistency` argument with a `Get` query. The available options are: -- `ONE` -- `QUORUM` (Default) -- `ALL` - -Read more about consistency levels [here](../../concepts/replication-architecture/consistency.md). - -import GraphQLGetConsistency from '/_includes/code/graphql.get.consistency.mdx'; - - +Where replication is enabled, you can specify a `consistency` argument. Options: `ONE`, `QUORUM` (default), `ALL`. Read more about [consistency levels](../../concepts/replication-architecture/consistency.md). ### Multi-tenancy In a multi-tenancy collection, each `Get` query must specify a tenant. -import GraphQLGetMT from '/_includes/code/graphql.get.multitenancy.mdx'; +import GraphQLGetMT from '/\_includes/code/graphql.get.multitenancy.mdx'; - :::tip Read more + - [How-to manage data: Multi-tenancy operations](../../manage-collections/multi-tenancy.mdx) -::: + ::: ## Cross-references -import CrossReferencePerformanceNote from '/_includes/cross-reference-performance-note.mdx'; +import CrossReferencePerformanceNote from '/\_includes/cross-reference-performance-note.mdx'; @@ -170,11 +147,11 @@ Weaviate supports cross-references between objects. Each cross-reference behaves You can retrieve cross-referenced properties with a `Get` query. -import GraphQLGetBeacon from '/_includes/code/graphql.get.beacon.mdx'; +import GraphQLGetBeacon from '/\_includes/code/graphql.get.beacon.mdx'; -import GraphQLGetBeaconUnfiltered from '!!raw-loader!/_includes/code/graphql.get.beacon.v3.py'; +import GraphQLGetBeaconUnfiltered from '!!raw-loader!/\_includes/code/graphql.get.beacon.v3.py';
Expected response @@ -189,60 +166,62 @@ import GraphQLGetBeaconUnfiltered from '!!raw-loader!/_includes/code/graphql.get
:::tip Read more -- [How-to retrieve cross-referenced properties](../../search/basics.md#retrieve-cross-referenced-properties) -::: + +- [How-to: Query & Search - Retrieve cross-referenced properties](../../search/basics.md#retrieve-cross-referenced-properties) + ::: ## Additional properties / metadata Various metadata properties may be retrieved with `Get{}` requests. They include: -Property | Description | --------- | ----------- | -`id` | Object id | -`vector` | Object vector | -`generate` | Generative module outputs | -`rerank` | Reranker module outputs | -`creationTimeUnix` | Object creation time | -`lastUpdateTimeUnix` | Object last updated time | -`distance` | Vector distance to query (vector search only) | -`certainty` | Vector distance to query, normalized to certainty (vector search only) | -`score` | Search score (BM25 and hybrid only) | -`explainScore` | Explanation of the score (BM25 and hybrid only) | -`classification` | Classification outputs | -`featureProjection` | Feature projection outputs | +| Property | Description | +| -------------------- | ---------------------------------------------------------------------- | +| `id` | Object id | +| `vector` | Object vector | +| `generate` | Generative module outputs | +| `rerank` | Reranker module outputs | +| `creationTimeUnix` | Object creation time | +| `lastUpdateTimeUnix` | Object last updated time | +| `distance` | Vector distance to query (vector search only) | +| `certainty` | Vector distance to query, normalized to certainty (vector search only) | +| `score` | Search score (BM25 and hybrid only) | +| `explainScore` | Explanation of the score (BM25 and hybrid only) | +| `classification` | Classification outputs | +| `featureProjection` | Feature projection outputs | They are returned through the `_additional` properties in the response. For further information see: :::tip Read more -- [References: GraphQL: Additional properties](./additional-properties.md) -- [How-to search: Specify fetched properties](../../search/basics.md#retrieve-object-properties) -::: +- [References: GraphQL: Additional properties](./additional-properties.md) +- [How-to: Query & Search: Specify fetched properties](../../search/basics.md#retrieve-object-properties) + ::: ## Search operators The following search operators are available. -| Argument | Description | Required integration type | Learn more | -| --- | --- | --- | --- | -| `nearObject` | Vector search using a Weaviate object | *none* | [Learn more](./search-operators.md#nearobject) | -| `nearVector` | Vector search using a raw vector | *none* | [Learn more](./search-operators.md#nearvector) | -| `nearText` | Vector search using a text query | Text embedding model | | -| `nearImage` | Vector search using an image | Multi-modal embedding model | -| `hybrid` | Combine vector and BM25 search results | *none* | [Learn more](../graphql/search-operators.md#hybrid) | -| `bm25` | Keyword search with BM25F ranking | *none* | [Learn more](../graphql/search-operators.md#bm25) | +| Argument | Description | Required integration type | Learn more | +| ------------ | -------------------------------------- | --------------------------- | --------------------------------------------------- | +| `nearObject` | Vector search using a Weaviate object | _none_ | [Learn more](./search-operators.md#nearobject) | +| `nearVector` | Vector search using a raw vector | _none_ | [Learn more](./search-operators.md#nearvector) | +| `nearText` | Vector search using a text query | Text embedding model | | +| `nearImage` | Vector search using an image | Multi-modal embedding model | +| `hybrid` | Combine vector and BM25 search results | _none_ | [Learn more](../graphql/search-operators.md#hybrid) | +| `bm25` | Keyword search with BM25F ranking | _none_ | [Learn more](../graphql/search-operators.md#bm25) | For further information see: :::tip Read more + - [References: GraphQL: Search operators](./search-operators.md) -- [How-to search: Similarity search](../../search/similarity.md) -- [How-to search: Image search](../../search/image.md) -- [How-to search: BM25 search](../../search/bm25.md) -- [How-to search: Hybrid search](../../search/hybrid.md) -::: +- [How-to: Query & Search: Similarity search](../../search/similarity.md) +- [How-to: Query & Search: Image search](../../search/image.md) +- [How-to: Query & Search: BM25 search](../../search/bm25.md) +- [How-to: Query & Search: Hybrid search](../../search/hybrid.md) + ::: ## Conditional filters @@ -251,10 +230,10 @@ For further information see: For further information see: :::tip Read more -- [References: GraphQL: Conditional Filters](./filters.md) -- [How-to search: Filters](../../search/filters.md) -::: +- [References: GraphQL: Conditional Filters](./filters.md) +- [How-to: Query & Search: Filters](../../search/filters.md) + ::: ## Additional operators @@ -263,16 +242,20 @@ For further information see: For further information see: :::tip Read more -- [References: GraphQL: Additional Operators](./additional-operators.md) -::: +- [References: GraphQL: Additional Operators](./additional-operators.md) + ::: -## Related pages -- [How-to: Search: Basics](../../search/basics.md) +## Further resources +- [How-to: Query & Search - Search patterns and basics](../../search/basics.md) +- [How-to: Query & Search - Similarity search](../../search/similarity.md) +- [How-to: Query & Search - Image search](../../search/image.md) +- [How-to: Query & Search - BM25 search](../../search/bm25.md) +- [How-to: Query & Search - Hybrid search](../../search/hybrid.md) ## Questions and feedback -import DocsFeedback from '/_includes/docs-feedback.mdx'; +import DocsFeedback from '/\_includes/docs-feedback.mdx'; diff --git a/docs/weaviate/api/graphql/index.md b/docs/weaviate/api/graphql/index.md index 78b1a9be..b17f9715 100644 --- a/docs/weaviate/api/graphql/index.md +++ b/docs/weaviate/api/graphql/index.md @@ -1,24 +1,20 @@ --- -title: Search (GraphQL | gRPC) -sidebar_position: 0 +title: Search API (GraphQL/gRPC) description: "GraphQL and gRPC API documentation for flexible querying and data retrieval in Weaviate." image: og/docs/api.jpg # tags: ['GraphQL references'] --- -## API +## Querying & search API -Weaviate offers [GraphQL](https://graphql.org/) and gRPC APIs for queries. +Weaviate offers [GraphQL](https://graphql.org/) and [gRPC](https://grpc.io/) APIs for queries. We recommend using a Weaviate [client library](../../client-libraries/index.mdx), which abstracts away the underlying API calls and makes it easier to integrate Weaviate into your application. -However, you can query Weaviate directly using GraphQL with a POST request to the `/graphql` endpoint, or write your own `gRPC` calls based on the [gRPC](../grpc.md) protobuf specification. +However, you can query Weaviate directly using GraphQL with a POST request to the [`/graphql` endpoint](/weaviate/api/rest/#tag/graphql), or write your own `gRPC` calls based on the [gRPC](../grpc.md) protobuf specification. - -## All references - -All references have their individual subpages. Click on one of the references below for more information. +Click on one of the references below for more information: - [Object-level queries](./get.md) - [Aggregate](./aggregate.md) @@ -31,8 +27,6 @@ All references have their individual subpages. Click on one of the references be ## GraphQL API -### Why GraphQL? - GraphQL is a query language built on using graph data structures. It is an efficient method of data retrieval and mutation, since it mitigates the common over-fetching and under-fetching problems of other query languages. :::tip GraphQL is case-sensitive @@ -79,8 +73,6 @@ GraphQL (`Get`) queries are run with a tunable [consistency level](../../concept ## gRPC API -Starting with Weaviate v1.19.0, a gRPC interface is being progressively added to Weaviate. - gRPC is a high-performance, open-source universal RPC framework that is contract-based and can be used in any environment. It is based on HTTP/2 and Protocol Buffers, and is therefore very fast and efficient. Read more about the gRPC API [here](../grpc.md). diff --git a/docs/weaviate/api/graphql/search-operators.md b/docs/weaviate/api/graphql/search-operators.md index 663c6f72..52d91c86 100644 --- a/docs/weaviate/api/graphql/search-operators.md +++ b/docs/weaviate/api/graphql/search-operators.md @@ -1,18 +1,13 @@ --- title: Search operators sidebar_position: 20 -description: "GraphQL search operators guide for advanced query construction and precise data targeting techniques." +description: "GraphQL search operators reference: variable tables, operator availability, and type definitions." image: og/docs/api.jpg # tags: ['graphql', 'search operators'] --- import SearchOperators from '/_includes/feature-notes/search-operators.mdx'; - -import TryEduDemo from '/_includes/try-on-edu-demo.mdx'; - - - This page covers the search operators that can be used in queries, such as vector search operators (`nearText`, `nearVector`, `nearObject`, etc), keyword search operator (`bm25`), hybrid search operator (`hybrid`). Only one search operator can be added to queries on the collection level. @@ -30,118 +25,51 @@ These operators are available in all Weaviate instances regardless of configurat ### Module-specific operators -Module-specific search operators are made available in certain Weaviate modules. - By adding relevant modules, you can use the following operators: * [nearText](#neartext) * [Multimodal search](#multimodal-search) * [ask](#ask) - ## Vector search operators -`nearXXX` operators allow you to find data objects based on their vector similarity to the query. They query can be a raw vector (`nearVector`) or an object UUID (`nearObject`). +`nearXXX` operators find data objects based on vector similarity. The query can be a raw vector (`nearVector`), an object UUID (`nearObject`), a text query (`nearText`), an image (`nearImage`), or another media input. -If the appropriate vectorizer model is enabled, a text query (`nearText`), an image (`nearImage`), or another media input may be be used as the query. +All vector search operators support `certainty` or `distance` thresholds, as well as [`limit`](./additional-operators.md#limit-argument) and [`autocut`](./additional-operators.md#autocut). -All vector search operators can be used with a `certainty` or `distance` threshold specified, as well as a [`limit` operator](./additional-operators.md#limit-argument) or an [`autocut` operator](./additional-operators.md#autocut) to specify the desired similarity or distance between the query and the results +:::tip How-to guide +For usage examples with multi-language code snippets, see [Vector similarity search](../../search/similarity.md). +::: ### nearVector `nearVector` finds data objects closest to an input vector. -#### Variables - | Variable | Required | Type | Description | | --- | --- | --- | --- | -| `vector` | yes | `[float]` | This variable takes a vector embedding in the form of an array of floats. The array should have the same length as the vectors in this collection. | +| `vector` | yes | `[float]` | An array of floats matching the collection vector length. | | `distance` | no | `float` | The maximum allowed distance to the provided search input. Cannot be used together with the `certainty` variable. The interpretation of the value of the distance field depends on the [distance metric used](/weaviate/config-refs/distances.md). | | `certainty` | no | `float` | Normalized Distance between the result item and the search vector. Normalized to be between 0 (perfect opposite) and 1 (identical vectors). Can't be used together with the `distance` variable. | -#### Example - -import GraphQLFiltersNearVector from '/_includes/code/graphql.filters.nearVector.mdx'; - - - ### nearObject -`nearVector` finds data objects closest to an existing object in the same collection. The object is typically specified by its UUID. +`nearObject` finds data objects closest to an existing object in the same collection, specified by UUID. * Note: You can specify an object's `id` or `beacon` in the argument, along with a desired `certainty`. * Note that the first result will always be the object used for search. -#### Variables - | Variable | Required | Type | Description | -| --------- | -------- | ---- | ----------- | +| --- | --- | --- | --- | | `id` | yes | `UUID` | Data object identifier in the uuid format. | | `beacon` | no | `url` | Data object identifier in the beacon URL format. E.g., `weaviate:////id`. | | `distance` | no | `float` | The maximum allowed distance to the provided search input. Cannot be used together with the `certainty` variable. The interpretation of the value of the distance field depends on the [distance metric used](/weaviate/config-refs/distances.md). | | `certainty` | no | `float` | Normalized Distance between the result item and the search vector. Normalized to be between 0 (perfect opposite) and 1 (identical vectors). Can't be used together with the `distance` variable. | -#### Example - -import GraphQLFiltersNearObject from '/_includes/code/graphql.filters.nearObject.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Publication": [ - { - "_additional": { - "distance": -1.1920929e-07 - }, - "name": "The New York Times Company" - }, - { - "_additional": { - "distance": 0.059879005 - }, - "name": "New York Times" - }, - { - "_additional": { - "distance": 0.09176409 - }, - "name": "International New York Times" - }, - { - "_additional": { - "distance": 0.13954824 - }, - "name": "New Yorker" - }, - ... - ] - } - } -} -``` - -
- - ### nearText -The `nearText` operator finds data objects based on their vector similarity to a natural language query. - -This operator is enabled if a compatible vectorizer module is configured for the collection. Compatible vectorizer modules are: - -* Any `text2vec` module -* Any `multi2vec` module - - -#### Variables +`nearText` finds data objects based on vector similarity to a natural language query. Requires a compatible vectorizer module (`text2vec` or `multi2vec`). | Variable | Required | Type | Description | | --- | --- | --- | --- | @@ -158,58 +86,11 @@ This operator is enabled if a compatible vectorizer module is configured for the | `moveAwayFrom{objects}`| no | `[UUID]` | Object IDs to move the results from. This is used to "bias" NLP search results into a certain direction in vector space. | | `moveAwayFrom{force}`| no | `float` | The force to apply to a particular movement. Must be between 0 and 1 where 0 is equivalent to no movement and 1 is equivalent to largest movement possible. | -#### Example I - -This example shows an example usage the `nearText` operator, including how to bias results towards another search query. - -import GraphQLFiltersNearText from '/_includes/code/graphql.filters.nearText.mdx'; - - - -#### Example II +For `moveTo`/`moveAwayFrom` usage examples, see [Bias results with moveTo / moveAwayFrom](../../search/similarity.md#bias-results-with-moveto--moveawayfrom). -You can also bias results toward other data objects. For example, in this query, we move our query about "travelling in asia", towards an article on food. +#### Concept parsing -import GraphQLFiltersNearText2Obj from '/_includes/code/graphql.filters.nearText.2obj.mdx'; - - - -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "_additional": { - "certainty": 0.9619976580142975 - }, - "summary": "We've scoured the planet for what we think are 50 of the most delicious foods ever created. A Hong Kong best food, best enjoyed before cholesterol checks. When you have a best food as naturally delicious as these little fellas, keep it simple. Courtesy Matt@PEK/Creative Commons/FlickrThis best food Thai masterpiece teems with shrimp, mushrooms, tomatoes, lemongrass, galangal and kaffir lime leaves. It's a result of being born in a land where the world's most delicious food is sold on nearly every street corner.", - "title": "World food: 50 best dishes" - }, - { - "_additional": { - "certainty": 0.9297388792037964 - }, - "summary": "The look reflects the elegant ambiance created by interior designer Joyce Wang in Hong Kong, while their mixology program also reflects the original venue. MONO Hong Kong , 5/F, 18 On Lan Street, Central, Hong KongKoral, The Apurva Kempinski Bali, IndonesiaKoral's signature dish: Tomatoes Bedugul. Esterre at Palace Hotel TokyoLegendary French chef Alain Ducasse has a global portfolio of restaurants, many holding Michelin stars. John Anthony/JW Marriott HanoiCantonese cuisine from Hong Kong is again on the menu, this time at the JW Marriott in Hanoi. Stanley takes its name from the elegant Hong Kong waterside district and the design touches reflect this legacy with Chinese antiques.", - "title": "20 best new Asia-Pacific restaurants to try in 2020" - } - ... - ] - } - } -} -``` - -
- -#### Additional information - -##### Concept parsing - -A `nearText` query will interpret each term in an array input as distinct strings to be vectorized. If multiple strings are passed, the query vector will be an average vector of the individual string vectors. +A `nearText` query interprets each term in the array input as a distinct string to be vectorized. If multiple strings are passed, the query vector will be an average vector of the individual string vectors. - `["New York Times"]` = one vector position is determined based on the occurrences of the words - `["New", "York", "Times"]` = all concepts have a similar weight. @@ -217,11 +98,11 @@ A `nearText` query will interpret each term in an array input as distinct string A practical example would be: `concepts: ["beatles", "John Lennon"]` -##### Semantic Path +#### Semantic Path -* Only available in `txt2vec-contextionary` module +*Only available with the `txt2vec-contextionary` module.* -The semantic path returns an array of concepts from the query to the data object. This allows you to see which steps Weaviate took and how the query and data object are interpreted. +The semantic path returns an array of concepts from the query to the data object, showing which steps Weaviate took and how the query and data object are interpreted. | Property | Description | | --- | --- | @@ -233,39 +114,33 @@ The semantic path returns an array of concepts from the query to the data object _Note: Building a semantic path is only possible if a [`nearText: {}` operator](#neartext) is set as the explore term represents the beginning of the path and each search result represents the end of the path. Since `nearText: {}` queries are currently exclusively possible in GraphQL, the `semanticPath` is therefore not available in the REST API._ -Example: showing a semantic path without edges. - -import GraphQLUnderscoreSemanticpath from '/_includes/code/graphql.underscoreproperties.semanticpath.mdx'; - - - - ### Multimodal search -Depending on the vectorizer module, you can use additional modalities such as images, audio, or video as the query, and retrieve corresponding, compatible objects. +Depending on the vectorizer module, you can use additional modalities such as images, audio, or video as the query. Some modules, such as `multi2vec-clip` and `multi2vec-bind` allow you to search across modalities. For example, you can search for images using a text query, or search for text using an image query. -For more information, see specific module pages such as these: - -* [Transformers multimodal embeddings](../../model-providers/transformers/embeddings-multimodal.md) -* [ImageBind multimodal embeddings](../../model-providers/imagebind/embeddings-multimodal.md) +For more information on which modules support multimodal search, see the [Model provider integrations](../../model-providers/index.md#model-provider-integrations) ## hybrid -This operator allows you to combine [BM25](#bm25) and vector search to get a "best of both worlds" type search results set. +Combines [BM25](#bm25) and vector search results using a fusion algorithm. + +:::tip How-to guide +For usage examples, fusion algorithm details, and multi-language code snippets, see [Hybrid search](../../search/hybrid.md). +::: ### Variables -| Variables | Required | Type | Description | +| Variable | Required | Type | Description | |--------------|----------|------------|-----------------------------------------------------------------------------| -| `query` | yes | `string` | search query | -| `alpha` | no | `float` | weighting for each search algorithm, default 0.75 | -| `vector` | no | `[float]` | optional to supply your own vector | -| `properties` | no | `[string]` | list of properties to limit the BM25 search to, default all text properties | -| `fusionType` | no | `string` | the type of hybrid fusion algorithm (available from `v1.20.0`) | -| `bm25SearchOperator` | no | `object` | set how many of the (bm25) query tokens must be present in the target object for it to be considered a match. (available from `v1.31.0`) | +| `query` | yes | `string` | Search query. | +| `alpha` | no | `float` | Weighting for each search algorithm, default 0.75. | +| `vector` | no | `[float]` | Optional custom vector. | +| `properties` | no | `[string]` | List of properties to limit the BM25 search to. Default: all text properties. | +| `fusionType` | no | `string` | `rankedFusion` or `relativeScoreFusion` (v1.20.0+). | +| `bm25SearchOperator` | no | `object` | Token match requirements for the BM25 portion (v1.31.0+). | * Notes: * `alpha` can be any number from 0 to 1, defaulting to 0.75. @@ -282,148 +157,13 @@ This operator allows you to combine [BM25](#bm25) and vector search to get a "be The `rankedFusion` algorithm is Weaviate's original hybrid fusion algorithm. -In this algorithm, each object is scored according to its position in the results for that search (vector or keyword). The top-ranked objects in each search get the highest scores. Scores decrease going from top to least ranked. The total score is calculated by adding the rank-based scores from the vector and keyword searches. +In this algorithm, each object is scored according to its position in the results for that search (vector or keyword). The top-ranked objects in each search get the highest scores. Scores decrease going from top to least ranked. The total score is calculated by adding the rank-based scores from the vector and keyword searches. The score is equal to `1/(RANK + 60)`. #### Relative score fusion In `relativeScoreFusion` the vector search and keyword search scores are scaled between `0` and `1`. The highest raw score becomes `1` in the scaled scores. The lowest value is assigned `0`. The remaining values are ranked between `0` and `1`. The total score is a scaled sum of the normalized vector similarity and normalized BM25 scores. -
- Fusion scoring comparison - -This example uses a small search result set to compare the ranked fusion and relative fusion algorithms. The table shows the following information: - -- `document id`, from 0 to 4 -- `keyword score`, sorted -- `vector search score`, sorted - - - - - - - - - - - - - - -
Search Type(id): score(id): score(id): score(id): score(id): score
Keyword(1): 5(0): 2.6(2): 2.3(4): 0.2(3): 0.09
Vector(2): 0.6(4): 0.598(0): 0.596(1): 0.594(3): 0.009
- -The ranking algorithms use these scores to derive the hybrid ranking. - -#### Ranked fusion - -The score depends on the rank of the result. The score is equal to `1/(RANK + 60)`: - - - - - - - - - - - - - - -
Search Type(id): score(id): score(id): score(id): score(id): score
Keyword(1): 0.0154(0): 0.0160(2): 0.0161(4): 0.0167(3): 0.0166
Vector(2): 0.016502(4): 0.016502(0): 0.016503(1): 0.016503(3): 0.016666
- -As you can see, the results of each rank is identical, regardless of the input score. - -#### Relative score fusion - -Here, we normalize the scores – the largest score is set to 1 and the lowest to 0, and all entries in-between are scaled according to their **relative distance** to the **maximum** and **minimum values**. - - - - - - - - - - - - - - -
Search Type(id): score(id): score(id): score(id): score(id): score
Keyword(1): 1.0(0): 0.511(2): 0.450(4): 0.022(3): 0.0
Vector(2): 1.0(4): 0.996(0): 0.993(1): 0.986(3): 0.0
- -Here, the scores reflect the relative distribution of the original scores. For example, the vector search scores of the first 4 documents were almost identical, which is still the case for the normalized scores. - -#### Weighting & final scores - -Before adding these scores up, they are weighted according to the alpha parameter. Let’s assume `alpha=0.5`, meaning both search types contribute equally to the final result and therefore each score is multiplied by 0.5. - -Now, we can add the scores for each document up and compare the results from both fusion algorithms. - - - - - - - - - - - - - - -
Algorithm Type(id): score(id): score(id): score(id): score(id): score
Ranked(2): 0.016301(1): 0.015952(0): 0.015952(4): 0.016600(3): 0.016630
Relative(1): 0.993(0): 0.752(2): 0.725(4): 0.509(3): 0.0
- -#### What can we learn from this? - -For the vector search, the scores for the top 4 objects (**IDs 2, 4, 0, 1**) were almost identical, and all of them were good results. While for the keyword search, one object (**ID 1**) was much better than the rest. - -This is captured in the final result of `relativeScoreFusion`, which identified the object **ID 1** the top result. This is justified because this document was the best result in the keyword search with a big gap to the next-best score and in the top group of vector search. - -In contrast, for `rankedFusion`, the object **ID 2** is the top result, closely followed by objects **ID 1** and **ID 0**. - -
- -For a fuller discussion of fusion methods, see [this blog post](https://weaviate.io/blog/hybrid-search-fusion-algorithms) - -### Additional metadata response - -Hybrid search results are sorted by a score, derived as a fused combination of their BM25F score and `nearText` similarity (higher is more relevant). This `score`, and additionally the `explainScore` metadata can be optionally retrieved in the response. - - -### Example - -import GraphQLFiltersHybrid from '/_includes/code/graphql.filters.hybrid.mdx'; - - - -### Example with vector specified - -You can optionally supply the vector query to the `vector` variable. This will override the `query` variable for the vector search component of the hybrid search. - -import GraphQLFiltersHybridVector from '/_includes/code/graphql.filters.hybrid.vector.mdx'; - - - -### Hybrid with a conditional filter - -A [conditional (`where`) filter](../graphql/filters.md) can be used with `hybrid`. - -import GraphQLFiltersHybridFilterExample from '/_includes/code/graphql.filters.hybrid.filter.example.mdx'; - - - - -### Specify object properties for BM25 search - -A `hybrid` operator can accept an array of strings to limit the set of properties for the BM25 component of the search. If unspecified, all text properties will be searched. - -import GraphQLFiltersHybridProperties from '/_includes/code/graphql.filters.hybrid.properties.mdx'; - - +For a fuller discussion of fusion methods, see [this blog post](https://weaviate.io/blog/hybrid-search-fusion-algorithms). ### Oversearch with `relativeScoreFusion` @@ -435,139 +175,84 @@ To mitigate this effect, Weaviate automatically performs a search with a higher -Use `bm25SearchOperator` to set how many of the query tokens must be present in the target object for it to be considered a match in the keyword (bm25) search portion of the hybrid search. This is useful when you want to ensure that only objects with a certain number of relevant keywords are returned. +Use `bm25SearchOperator` to set how many of the query tokens must be present in the target object for it to be considered a match in the keyword (bm25) search portion of the hybrid search. The available options are `And`, or `Or`. If `Or` is set, an additional parameter `minimumOrTokensMatch` must be specified, which defines how many of the query tokens must match for the object to be considered a match. -If not yet, the keyword search will behave as if `Or` was set with `minimumOrTokensMatch` equal to 1. +If not set, the keyword search will behave as if `Or` was set with `minimumOrTokensMatch` equal to 1. -## BM25 +### Metadata response -The `bm25` operator performs a keyword (sparse vector) search, and uses the BM25F ranking function to score the results. BM25F (**B**est **M**atch **25** with Extension to Multiple Weighted **F**ields) is an extended version of BM25 that applies the scoring algorithm to multiple fields (`properties`), producing better results. +Hybrid results include a fused `score` and optional `explainScore` metadata. -The search is case-insensitive, and case matching does not confer a score advantage. Stop words are removed. [Stemming is not supported yet](https://github.com/weaviate/weaviate/issues/2439). -### Schema configuration - -The [free parameters `k1` and `b`](https://en.wikipedia.org/wiki/Okapi_BM25#The_ranking_function) are configurable and optional. See the [schema reference](../../config-refs/indexing/inverted-index.mdx#bm25) for more details. - -### Variables -The `bm25` operator supports the following variables: +## BM25 -| Variables | Required | Description | -| --------- | -------- | ----------- | -| `query` | yes | The keyword search query. | -| `properties` | no | Array of properties (fields) to search in, defaulting to all properties in the collection. | -| `searchOperator` | no | set how many of the query tokens must be present in the target object for it to be considered a match. (available from `v1.31.0`) | +The `bm25` operator performs a keyword (sparse vector) search using the BM25F ranking function. The search is case-insensitive, stop words are removed. [Stemming is not supported yet](https://github.com/weaviate/weaviate/issues/2439). -:::info Boosting properties -Specific properties can be boosted by a factor specified as a number after the caret sign, for example `properties: ["title^3", "summary"]`. +:::tip How-to guide +For usage examples and multi-language code snippets, see [Keyword search](../../search/bm25.md). ::: -### Additional metadata response - -The BM25F `score` metadata can be optionally retrieved in the response. A higher score indicates higher relevance. - -### Example query +### Variables -import GraphQLFiltersBM25 from '/_includes/code/graphql.filters.bm25.mdx'; +| Variable | Required | Description | +| --- | --- | --- | +| `query` | yes | The keyword search query. | +| `properties` | no | Properties to search in. Default: all text properties. Supports boosting (e.g. `"title^3"`). | +| `searchOperator` | no | Token match requirements (v1.31.0+). | - +### Schema configuration -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "_additional": { - "certainty": null, - "distance": null, - "score": "3.4985464" - }, - "title": "Tim Dowling: is the dog’s friendship with the fox sweet – or a bad omen?" - } - ] - } - }, - "errors": null -} -``` +The free parameters [`k1` and `b`](https://en.wikipedia.org/wiki/Okapi_BM25#The_ranking_function) are configurable. See the [inverted index reference](../../config-refs/indexing/inverted-index.mdx#bm25). -
+### Metadata response -### BM25 with a conditional filter +The BM25F `score` metadata can be retrieved in the response. A higher score indicates higher relevance. -A [conditional (`where`) filter](../graphql/filters.md) can be used with `bm25`. - -import GraphQLFiltersBM25FilterExample from '/_includes/code/graphql.filters.bm25.filter.example.mdx'; +### Search operator - + -
- Expected response - -```json -{ - "data": { - "Get": { - "Article": [ - { - "summary": "Sometimes, the hardest part of setting a fishing record is just getting the fish weighed. A Kentucky fisherman has officially set a new record in the state after reeling in a 9.05-pound saugeye. While getting the fish in the boat was difficult, the angler had just as much trouble finding an officially certified scale to weigh it on. In order to qualify for a state record, fish must be weighed on an officially certified scale. The previous record for a saugeye in Kentucky ws an 8 pound, 8-ounce fish caught in 2019.", - "title": "Kentucky fisherman catches record-breaking fish, searches for certified scale" - }, - { - "summary": "Unpaid last month because there wasn\u2019t enough money. Ms. Hunt picks up shifts at JJ Fish & Chicken, bartends and babysits. three daughters is subsidized,and cereal fromErica Hunt\u2019s monthly budget on $12 an hourErica Hunt\u2019s monthly budget on $12 an hourExpensesIncome and benefitsRent, $775Take-home pay, $1,400Varies based on hours worked. Daycare, $600Daycare for Ms. Hunt\u2019s three daughters is subsidized, as are her electricity and internet costs. Household goods, $300Child support, $350Ms. Hunt picks up shifts at JJ Fish & Chicken, bartends and babysits to make more money.", - "title": "Opinion | What to Tell the Critics of a $15 Minimum Wage" - }, - ... - ] - } - } -} - -``` +Use `searchOperator` to set how many of the query tokens must be present in the target object for it to be considered a match. -
+The available options are `And`, or `Or`. If `Or` is set, an additional parameter `minimumOrTokensMatch` must be specified, which defines how many of the query tokens must match for the object to be considered a match. -### Search operator +If not set, the keyword search will behave as if `Or` was set with `minimumOrTokensMatch` equal to 1. - +:::info Boosting properties +Specific properties can be boosted by a factor specified as a number after the caret sign, for example `properties: ["title^3", "summary"]`. +::: -Use `searchOperator` to set how many of the query tokens must be present in the target object for it to be considered a match. This is useful when you want to ensure that only objects with a certain number of relevant keywords are returned. -The available options are `And`, or `Or`. If `Or` is set, an additional parameter `minimumOrTokensMatch` must be specified, which defines how many of the query tokens must match for the object to be considered a match. +## ask (legacy) -If not yet, the keyword search will behave as if `Or` was set with `minimumOrTokensMatch` equal to 1. +:::caution Outdated +The `ask` operator is a legacy feature. For question answering over your Weaviate data, use the [Weaviate Query Agent](/agents/query) instead. +::: -## ask +
+ ask operator reference Enabled by the module: [Question Answering](/weaviate/modules/qna-transformers.md). This operator allows you to return answers to questions by running the results through a Q&A model. -### Variables +#### Variables | Variable | Required | Type | Description | -| --------- | -------- | ---- | ----------- | -| `question` | yes | `string` | The question to be answered. | -| `certainty` | no | `float` | Desired minimal certainty or confidence of answer to the question. The higher the value, the stricter the search becomes. The lower the value, the fuzzier the search becomes. If no certainty is set, any answer that could be extracted will be returned. | -| `properties` | no | `[string]` | The properties of the queries collection which contains text. If no properties are set, all are considered. | -| `rerank` | no | `boolean` | If enabled, the qna module will rerank the result based on the answer score. For example, if the 3rd result - as determined by the previous (semantic) search contained the most likely answer, result 3 will be pushed to position 1, etc. *Supported since v1.10.0* | - -### Example - -import QNATransformersAsk from '/_includes/code/qna-transformers.ask.mdx'; - - +| --- | --- | --- | --- | +| `question` | yes | `string` | The question to be answered. | +| `certainty` | no | `float` | Desired minimal certainty or confidence of answer to the question. The higher the value, the stricter the search becomes. The lower the value, the fuzzier the search becomes. If no certainty is set, any answer that could be extracted will be returned. | +| `properties` | no | `[string]` | The properties of the queries collection which contains text. If no properties are set, all are considered. | +| `rerank` | no | `boolean` | If enabled, the qna module will rerank the result based on the answer score. For example, if the 3rd result - as determined by the previous (semantic) search contained the most likely answer, result 3 will be pushed to position 1, etc. | -### Additional metadata response +#### Metadata response The `answer` and a `certainty` can be retrieved. +
+ ## Questions and feedback diff --git a/docs/weaviate/search/basics.md b/docs/weaviate/search/basics.md index 429c09cb..df7db470 100644 --- a/docs/weaviate/search/basics.md +++ b/docs/weaviate/search/basics.md @@ -701,10 +701,68 @@ import QueryReplication from '/\_includes/code/replication.get.object.by.id.mdx' +## Group results by property + +You can group objects that match a query. Groups are defined by a property, and you can limit the number of groups and objects per group. This requires a vector search operator (e.g. `nearText`, `nearVector`, `nearObject`). + +import GroupbyLimitations from '/_includes/groupby-limitations.mdx'; + + + +The groupBy syntax in GraphQL: + +```graphql +{ + Get{ + ( + + groupBy:{ + path: [] # Property to group by (one property or cross-reference) + groups: # Max number of groups + objectsPerGroup: # Max objects per group + } + ) { + _additional { + group { + id + groupedBy{ value path } + count + maxDistance + minDistance + hits { + + _additional { + id + vector + distance + } + } + } + } + } + } +} +``` + +For client code examples, see [Vector similarity: Group results](./similarity.md#group-results), [Hybrid: Group results](./hybrid.md#group-results), or [BM25: Group results](./bm25.md#group-results). + +
+ Consistency levels + +Where replication is enabled, you can specify a consistency level with queries. The available options are: +- `ONE` +- `QUORUM` (Default) +- `ALL` + +Read more about [consistency levels](../concepts/replication-architecture/consistency.md). + +
+ ## Related pages - [Connect to Weaviate](/weaviate/connections) -- [API References: GraphQL: Get](../api/graphql/get.md) +- [API reference: GraphQL: Get](../api/graphql/get.md) +- [Sort and paginate results](./sort-and-paginate.md) - For tutorials, see [Queries](/weaviate/tutorials/query.md) - For search using the GraphQL API, see [GraphQL API](../api/graphql/get.md) diff --git a/docs/weaviate/search/bm25.md b/docs/weaviate/search/bm25.md index 385167b4..739a4307 100644 --- a/docs/weaviate/search/bm25.md +++ b/docs/weaviate/search/bm25.md @@ -814,10 +814,38 @@ Set the tokenization method to `trigram` at the property level when creating you ::: +## Schema configuration + +The free parameters `k1` and `b` in the [BM25 ranking function](https://en.wikipedia.org/wiki/Okapi_BM25#The_ranking_function) are configurable per collection. See the [inverted index reference](../config-refs/indexing/inverted-index.mdx#bm25) for details. + +## Search operator (token matching) + + + +Use `searchOperator` to control how many query tokens must be present in the target object for it to be a match. The available options are: + +- **`Or`**: At least `minimumOrTokensMatch` tokens must match (default behavior with minimum of 1). +- **`And`**: All tokens must match. + +:::info Property boosting syntax +Specific properties can be boosted by a factor using the caret syntax: `properties: ["title^3", "summary"]`. This multiplies the BM25 score contribution from the boosted property. +::: + +
+ BM25 variable table + +| Variable | Required | Description | +| --- | --- | --- | +| `query` | yes | The keyword search query. | +| `properties` | no | Properties to search in. Default: all text properties. Supports boosting with caret syntax (e.g. `"title^3"`). | +| `searchOperator` | no | Token match requirements (v1.31.0+). | + +
+ ## Further resources - [Connect to Weaviate](../connections/index.mdx) -- [API References: Search operators # BM25](../api/graphql/search-operators.md#bm25) +- [API reference: Search operators # BM25](../api/graphql/search-operators.md#bm25) - [Reference: Tokenization options](../config-refs/collections.mdx#tokenization) ## Questions and feedback diff --git a/docs/weaviate/search/filters.md b/docs/weaviate/search/filters.md index 7d6cb5b8..a7bc648a 100644 --- a/docs/weaviate/search/filters.md +++ b/docs/weaviate/search/filters.md @@ -666,7 +666,7 @@ The output is like this: The `*` wildcard operator matches zero or more characters. The `?` operator matches exactly one character.
-Currently, the `Like` filter is not able to match wildcard characters (`?` and `*`) as literal characters ([read more](../api/graphql/filters.md#wildcard-literal-matches-with-like)). +Currently, the `Like` filter is not able to match wildcard characters (`?` and `*`) as literal characters. For example, it is not possible to match only the string `car*` without also matching `car`, `care`, or `carpet`. @@ -1085,12 +1085,47 @@ If you encounter slow filter performance, consider adding a `limit` parameter or ## List of filter operators -For a list of filter operators, see [the reference page](../api/graphql/filters.md#filter-structure). +The `where` filter supports the following operators: + +| Operator | Description | +| --- | --- | +| `And` | Combine multiple conditions (all must match). | +| `Or` | Combine multiple conditions (at least one must match). | +| `Not` | Negate a condition. | +| `Equal` | Exact match. | +| `NotEqual` | Inverse of `Equal`. | +| `GreaterThan` | Greater than comparison. | +| `GreaterThanEqual` | Greater than or equal comparison. | +| `LessThan` | Less than comparison. | +| `LessThanEqual` | Less than or equal comparison. | +| `Like` | Partial text match with `?` (one char) and `*` (zero+ chars) wildcards. | +| `WithinGeoRange` | Geo-coordinate radius search. | +| `IsNull` | Filter by null/non-null state. | +| `ContainsAny` | Array/text contains at least one of the values. | +| `ContainsAll` | Array/text contains all of the values. | +| `ContainsNone` | Array/text contains none of the values. | + +### Value types + +When specifying filter values, use the appropriate `valueType` for the property's data type: + +| valueType | Data types | +| --- | --- | +| `valueInt` | `int` | +| `valueBoolean` | `boolean` | +| `valueString` | `string` (deprecated) | +| `valueText` | `text`, `uuid`, `geoCoordinates`, `phoneNumber` | +| `valueNumber` | `number` | +| `valueDate` | `date` (ISO 8601 / [RFC 3339](https://datatracker.ietf.org/doc/rfc3339/) format) | + +### Filter path syntax + +The `path` is a list of strings in [XPath](https://en.wikipedia.org/wiki/XPath#Abbreviated_syntax) style indicating the property name. For cross-references, follow the path as a list: `["inPublication", "Publication", "name"]`. ## Related pages - [Connect to Weaviate](/weaviate/connections/index.mdx) -- [API References: Filters](../api/graphql/filters.md) +- [API reference: Filters](../api/graphql/filters.md) ## Questions and feedback diff --git a/docs/weaviate/search/hybrid.md b/docs/weaviate/search/hybrid.md index 8d0d47ba..753d16d8 100644 --- a/docs/weaviate/search/hybrid.md +++ b/docs/weaviate/search/hybrid.md @@ -894,7 +894,7 @@ To limit results to groups with similar distances from the query, use the [`auto Autocut requires `Relative Score Fusion` method because it uses actual similarity scores to detect cutoff points. Autocut shouldn't be used with `Ranked Fusion` as this fusion method relies on ranking positions, not similarity scores. -To learn more about the different fusion algorithms, visit the [search operators reference page](/weaviate/api/graphql/search-operators#fusion-algorithms). +To learn more about the different fusion algorithms, see the [Fusion algorithms](#fusion-algorithms) section below. ::: @@ -1038,10 +1038,126 @@ import TokenizationNote from '/\_includes/tokenization.mdx' +## Fusion algorithms + +Hybrid search uses a fusion algorithm to combine BM25 and vector search results. Two algorithms are available: + +### Ranked fusion + +The `rankedFusion` algorithm (default) scores each object by its position (rank) in each result set. Top-ranked objects get the highest scores. The total score is the sum of rank-based scores from both searches. This means the original score magnitudes are discarded — only rank order matters. + +### Relative score fusion + +The `relativeScoreFusion` algorithm normalizes the raw scores from each search to a 0–1 range, preserving the relative distribution of scores. The total score is a weighted sum of the normalized vector similarity and BM25 scores. + +
+ Fusion scoring comparison + +This example compares the two algorithms on a small result set: + + + + + + + + + + + + + + +
Search Type(id): score(id): score(id): score(id): score(id): score
Keyword(1): 5(0): 2.6(2): 2.3(4): 0.2(3): 0.09
Vector(2): 0.6(4): 0.598(0): 0.596(1): 0.594(3): 0.009
+ +**Ranked fusion** — scores depend only on rank (`1/(RANK + 60)`): + + + + + + + + + + + + + + +
Search Type(id): score(id): score(id): score(id): score(id): score
Keyword(1): 0.0154(0): 0.0160(2): 0.0161(4): 0.0167(3): 0.0166
Vector(2): 0.016502(4): 0.016502(0): 0.016503(1): 0.016503(3): 0.016666
+ +**Relative score fusion** — scores normalized to 0–1 range: + + + + + + + + + + + + + + +
Search Type(id): score(id): score(id): score(id): score(id): score
Keyword(1): 1.0(0): 0.511(2): 0.450(4): 0.022(3): 0.0
Vector(2): 1.0(4): 0.996(0): 0.993(1): 0.986(3): 0.0
+ +**Final scores** (with `alpha=0.5`): + + + + + + + + + + + + + + +
Algorithm(id): score(id): score(id): score(id): score(id): score
Ranked(2): 0.016301(1): 0.015952(0): 0.015952(4): 0.016600(3): 0.016630
Relative(1): 0.993(0): 0.752(2): 0.725(4): 0.509(3): 0.0
+ +`relativeScoreFusion` preserves the large gap in keyword scores, identifying ID 1 as the top result. `rankedFusion` only considers rank positions, yielding a flatter score distribution. + +
+ +For a fuller discussion, see [this blog post](https://weaviate.io/blog/hybrid-search-fusion-algorithms). + +:::note Oversearch with `relativeScoreFusion` +When using `relativeScoreFusion` with a small `limit`, the result set can be sensitive to the limit parameter due to score normalization. Weaviate automatically oversearches (with a higher limit of 100) and trims the results to mitigate this. +::: + +## BM25 search operator for hybrid + + + +Use `bm25SearchOperator` to control how many query tokens must be present in the target object for it to be a match in the keyword (BM25) portion of the hybrid search. The available options are: + +- **`Or`**: At least `minimumOrTokensMatch` tokens must match (default behavior with minimum of 1). +- **`And`**: All tokens must match. + +
+ Hybrid variable table + +| Variable | Required | Type | Description | +|---|---|---|---| +| `query` | yes | `string` | Search query. | +| `alpha` | no | `float` | Weighting (0 = pure keyword, 1 = pure vector). Default 0.75. | +| `vector` | no | `[float]` | Optional custom vector. | +| `properties` | no | `[string]` | Limit BM25 to these properties. | +| `fusionType` | no | `string` | `rankedFusion` or `relativeScoreFusion` (v1.20.0+). | +| `bm25SearchOperator` | no | `object` | Token match requirements (v1.31.0+). | + +
+ ## Related pages - [Connect to Weaviate](/weaviate/connections/index.mdx) -- [API References: Search operators # Hybrid](../api/graphql/search-operators.md#hybrid) +- [API reference: Search operators # Hybrid](../api/graphql/search-operators.md#hybrid) - About [hybrid fusion algorithms](https://weaviate.io/blog/hybrid-search-fusion-algorithms). - For tutorials, see [Queries](/weaviate/tutorials/query.md) - For search using the GraphQL API, see [GraphQL API](../api/graphql/get.md). diff --git a/docs/weaviate/search/index.mdx b/docs/weaviate/search/index.mdx index 273d5a94..23f66761 100644 --- a/docs/weaviate/search/index.mdx +++ b/docs/weaviate/search/index.mdx @@ -82,6 +82,13 @@ export const searchTopicsData = [ link: "/weaviate/search/filters", icon: "fas fa-filter", }, + { + title: "Sort and paginate", + description: + "Sort results by properties, paginate with offset/limit, and iterate with cursor-based retrieval.", + link: "/weaviate/search/sort-and-paginate", + icon: "fas fa-sort-amount-down-alt", + }, ];
diff --git a/docs/weaviate/search/similarity.md b/docs/weaviate/search/similarity.md index f7a5ca8d..1b9c1ed9 100644 --- a/docs/weaviate/search/similarity.md +++ b/docs/weaviate/search/similarity.md @@ -746,9 +746,121 @@ The output is like this: +## Bias results with `moveTo` / `moveAwayFrom` + +When using `nearText`, you can bias the search direction by moving the query vector toward or away from other concepts or objects. + +For example, this query searches for "traveling in Asia" and biases results toward an article about food: + +import GraphQLFiltersNearText2Obj from '/_includes/code/graphql.filters.nearText.2obj.mdx'; + + + +
+ Expected response + +```json +{ + "data": { + "Get": { + "Article": [ + { + "_additional": { + "certainty": 0.9619976580142975 + }, + "summary": "We've scoured the planet for what we think are 50 of the most delicious foods ever created...", + "title": "World food: 50 best dishes" + }, + { + "_additional": { + "certainty": 0.9297388792037964 + }, + "summary": "The look reflects the elegant ambiance created by interior designer Joyce Wang in Hong Kong...", + "title": "20 best new Asia-Pacific restaurants to try in 2020" + } + ] + } + } +} +``` + +
+ +The `moveTo` and `moveAwayFrom` parameters each accept `concepts` (text strings), `objects` (UUIDs), and a `force` (0–1) controlling the strength of the bias. + +
+ Concept parsing + +A `nearText` query interprets each term in an array input as a distinct string to be vectorized. If multiple strings are passed, the query vector is an average of the individual string vectors. + +- `["New York Times"]` — one vector position based on the whole phrase. +- `["New", "York", "Times"]` — all concepts weighted similarly. +- `["New York", "Times"]` — a combination of the two above. + +A practical example: `concepts: ["beatles", "John Lennon"]` + +
+ +
+ Semantic path (Contextionary only) + +The semantic path returns an array of concepts from the query to the data object, showing the steps Weaviate's Contextionary took to connect them. + +| Property | Description | +| --- | --- | +| `concept` | The concept found at this step. | +| `distanceToNext` | Distance to the next step (null for the last step). | +| `distanceToPrevious` | Distance to the previous step (null for the first step). | +| `distanceToQuery` | Distance of this step to the query. | +| `distanceToResult` | Distance of this step to the result. | + +Building a semantic path requires a `nearText` operator, so it is only available in GraphQL. + +
+ +
+ Variable tables for vector search operators + +#### `nearVector` variables + +| Variable | Required | Type | Description | +| --- | --- | --- | --- | +| `vector` | yes | `[float]` | Array of floats matching the collection vector length. | +| `distance` | no | `float` | Maximum allowed distance. Cannot be used with `certainty`. | +| `certainty` | no | `float` | Normalized 0–1 distance. Cannot be used with `distance`. | + +#### `nearObject` variables + +| Variable | Required | Type | Description | +| --- | --- | --- | --- | +| `id` | yes | `UUID` | Object identifier. | +| `beacon` | no | `url` | Beacon URL format identifier. | +| `distance` | no | `float` | Maximum allowed distance. Cannot be used with `certainty`. | +| `certainty` | no | `float` | Normalized 0–1 distance. Cannot be used with `distance`. | + +#### `nearText` variables + +| Variable | Required | Type | Description | +| --- | --- | --- | --- | +| `concepts` | yes | `[string]` | Natural language queries or single words. | +| `distance` | no | `float` | Maximum allowed distance. Cannot be used with `certainty`. | +| `certainty` | no | `float` | Normalized 0–1 distance. Cannot be used with `distance`. | +| `autocorrect` | no | `boolean` | Requires the `text-spellcheck` module. | +| `moveTo` | no | `object{}` | Move search toward another vector. | +| `moveTo{concepts}` | no | `[string]` | Concepts to move toward. | +| `moveTo{objects}` | no | `[UUID]` | Object IDs to move toward. | +| `moveTo{force}` | no | `float` | Movement force (0–1). | +| `moveAwayFrom` | no | `object{}` | Move search away from another vector. | +| `moveAwayFrom{concepts}` | no | `[string]` | Concepts to move away from. | +| `moveAwayFrom{objects}` | no | `[UUID]` | Object IDs to move away from. | +| `moveAwayFrom{force}` | no | `float` | Movement force (0–1). | + +
+ ## Related pages - [Connect to Weaviate](/weaviate/connections/index.mdx) +- [API reference: Search operators](../api/graphql/search-operators.md) - For image search, see [Image search](/weaviate/search/image). - For tutorials, see [Queries](/weaviate/tutorials/query.md). - For search using the GraphQL API, see [GraphQL API](/weaviate/api). diff --git a/docs/weaviate/search/sort-and-paginate.md b/docs/weaviate/search/sort-and-paginate.md new file mode 100644 index 00000000..f5b7be70 --- /dev/null +++ b/docs/weaviate/search/sort-and-paginate.md @@ -0,0 +1,399 @@ +--- +title: Sort and paginate +sidebar_position: 95 +image: og/docs/howto.jpg +description: "Sort results by properties, paginate with offset and limit, and use cursor-based iteration with code examples." +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; +import PyCode from '!!raw-loader!/_includes/code/graphql.additional.py'; +import PyCodeV3 from '!!raw-loader!/_includes/code/graphql.additional-v3.py'; +import TSCode from '!!raw-loader!/_includes/code/graphql.additional.ts'; +import GoCode from '!!raw-loader!/_includes/code/graphql.additional.go'; +import JavaCode from '!!raw-loader!/_includes/code/graphql.additional.java'; +import CurlCode from '!!raw-loader!/_includes/code/graphql.additional.sh'; + +## Sorting + +You can sort results by any primitive property, such as `text`, `number`, or `int`. + +:::note +Sorting is available when fetching objects, but **unavailable when using search operators**. Search operators automatically rank results by relevance (e.g. distance or score). +::: + +### Sort order + +| Parameter | Required | Type | Description | +|-----------|----------|-----------------|-----------------------------------------------------------| +| `path` | yes | `text` | A single-element array containing the field name. GraphQL supports specifying the field name directly. | +| `order` | varies by client | `asc` or `desc` | The sort order, ascending (default) or descending. | + +#### Boolean values + +`false` is considered smaller than `true`. `false` comes before `true` in ascending order and after `true` in descending order. + +#### Null values + +`null` values are considered smaller than any non-`null` values. `null` values come first in ascending order and last in descending order. + +#### Arrays + +Arrays are compared element by element from the beginning. When an element in one array is smaller than its counterpart in the second array, the whole first array is considered smaller. + +Arrays are equal if they have the same length and all elements are equal. A subset array is considered smaller. + +Examples: +- `[1, 2, 3] = [1, 2, 3]` +- `[1, 2, 4] < [1, 3, 4]` +- `[2, 2] > [1, 2, 3, 4]` +- `[1, 2, 3] < [1, 2, 3, 4]` + +### Sort by a single property + + + + + + + + + + + + + + + + + + + +
+ Expected response + +```json +{ + "data": { + "Get": { + "JeopardyQuestion": [ + { + "answer": "$5 (Lincoln Memorial in the background)", + "points": 600, + "question": "A sculpture by Daniel Chester French can be seen if you look carefully on the back of this current U.S. bill" + }, + { + "answer": "(1 of 2) Juneau, Alaska or Augusta, Maine", + "points": 0, + "question": "1 of the 2 U.S. state capitals that begin with the names of months" + }, + { + "answer": "(1 of 2) Juneau, Alaska or Honolulu, Hawaii", + "points": 0, + "question": "One of the 2 state capitals whose names end with the letter \"U\"" + } + ] + } + } +} +``` + +
+ +### Sort by multiple properties + +To sort by more than one property, pass an array of { `path`, `order` } objects to the sort function: + + + + + + + + + + + + + + + + + + + +### Sort by metadata properties + +To sort with metadata, add an underscore to the property name. + +| Property Name | Sort Property Name | +| :- | :- | +| `id` | `_id` | +| `creationTimeUnix` | `_creationTimeUnix` | +| `lastUpdateTimeUnix` | `_lastUpdateTimeUnix` | + + + + + + + + + + + + + + + + + + + +
+ Python client v4 property names + +| Property Name | Sort Property Name | +| :- | :- | +| `uuid` |`_id` | +| `creation_time` | `_creationTimeUnix` | +| `last_update_time` | `_lastUpdateTimeUnix` | + +
+ +### Sorting considerations + +Weaviate's sorting implementation does not lead to massive memory spikes — only the property values being sorted are kept in memory, not all object properties. + +No sorting-specific data structures are used on disk. This works well for small-to-medium scales (hundreds of thousands to millions of objects), but can be expensive for very large collections (hundreds of millions+). + +## Pagination with `offset` + +Use `offset` and `limit` together to paginate through results. + +For example, to list the first ten results, set `limit: 10` and `offset: 0`. To display the next ten, set `offset: 10`. + +import GraphQLFiltersLimit from '/_includes/code/graphql.filters.limit.mdx'; + + + +
+ Expected response + +```json +{ + "data": { + "Get": { + "Article": [ + { + "title": "Backs on the rack - Vast sums are wasted on treatments for back pain that make it worse" + }, + { + "title": "Graham calls for swift end to impeachment trial, warns Dems against calling witnesses" + }, + { + "title": "Through a cloud, brightly - Obituary: Paul Volcker died on December 8th" + }, + { + "title": "Google Stadia Reviewed \u2013 Against The Stream" + }, + { + "title": "Managing Supply Chain Risk" + } + ] + } + } +} +``` + +
+ +import GraphQLFiltersOffset from '/_includes/code/graphql.filters.offset.mdx'; + + + +
+ Expected response + +```json +{ + "data": { + "Get": { + "Article": [ + { + "title": "Through a cloud, brightly - Obituary: Paul Volcker died on December 8th" + }, + { + "title": "Google Stadia Reviewed \u2013 Against The Stream" + }, + { + "title": "Managing Supply Chain Risk" + }, + { + "title": "Playing College Football In Madden" + }, + { + "title": "The 50 best albums of 2019, No 3: Billie Eilish \u2013 When We All Fall Asleep, Where Do We Go?" + } + ] + } + } +} +``` + +
+ +### Performance considerations + +Pagination is not cursor-based. This has the following implications: + +- **Response time increases with the offset.** Each page request requires a new, larger call. For example, requesting results 21–30 means Weaviate retrieves 30 objects and drops the first 20. +- **Multi-shard configurations amplify resource usage.** Each shard retrieves a full list and drops the objects before the offset. With 10 shards and results 91–100, Weaviate retrieves 1000 objects (100 per shard) and drops 990. +- **A maximum result limit applies.** If `offset + limit` exceeds `QUERY_MAXIMUM_RESULTS`, Weaviate returns an error. Edit the `QUERY_MAXIMUM_RESULTS` environment variable to change this limit. +- **Pagination is not stateful.** Database changes between calls can cause pages to miss or duplicate results. However, if there are no writes the result set is consistent. + +For large-scale sequential retrieval, use the [cursor API](#cursor-with-after) instead. + +## Cursor with `after` + +The `after` operator retrieves objects sequentially using a cursor based on object IDs. Unlike offset pagination, it performs consistently regardless of position in the result set. + +`after` is compatible with single-shard and multi-shard configurations, but only works with list queries. It is **not** compatible with `where`, `near`, `bm25`, `hybrid`, or similar search operators. For those, use [pagination with `offset`](#pagination-with-offset). + +import GraphQLFiltersAfter from '/_includes/code/graphql.filters.after.mdx'; + + + +
+ Expected response + +```json +{ + "data": { + "Get": { + "Article": [ + { + "_additional": { + "id": "00313a4c-4308-30b0-af4a-01773ad1752b" + }, + "title": "Managing Supply Chain Risk" + }, + { + "_additional": { + "id": "0042b9d0-20e4-334e-8f42-f297c150e8df" + }, + "title": "Playing College Football In Madden" + }, + { + "_additional": { + "id": "0047c049-cdd6-3f6e-bb89-84ae20b74f49" + }, + "title": "The 50 best albums of 2019, No 3: Billie Eilish \u2013 When We All Fall Asleep, Where Do We Go?" + }, + { + "_additional": { + "id": "00582185-cbf4-3cd6-8c59-c2d6ec979282" + }, + "title": "How artificial intelligence is transforming the global battle against human trafficking" + }, + { + "_additional": { + "id": "0061592e-b776-33f9-8109-88a5bd41df78" + }, + "title": "Masculine, feminist or neutral? The language battle that has split Spain" + } + ] + } + } +} +``` + +
+ +## Related pages + +- [API reference: Additional operators](../api/graphql/additional-operators.md) +- [Query basics: `limit` returned objects](./basics.md#limit-returned-objects) +- [Query basics: Paginate with `limit` and `offset`](./basics.md#paginate-with-limit-and-offset) + +## Questions and feedback + +import DocsFeedback from '/_includes/docs-feedback.mdx'; + + diff --git a/docusaurus.config.js b/docusaurus.config.js index a0340dfb..70f19523 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -213,7 +213,7 @@ const config = { prism: { theme: prismThemes.github, darkTheme: prismThemes.dracula, - additionalLanguages: ["java", "csharp"], + additionalLanguages: ["java", "csharp", "bash"], }, docs: { sidebar: { diff --git a/sidebars.js b/sidebars.js index ffaee594..e6840529 100644 --- a/sidebars.js +++ b/sidebars.js @@ -633,6 +633,7 @@ const sidebars = { "weaviate/search/rerank", "weaviate/search/aggregate", "weaviate/search/filters", + "weaviate/search/sort-and-paginate", { type: "link", label: "Search strategies: In depth",