Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions docs/layouts/shortcodes/generated/core_configuration.html
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,6 @@
<td>String</td>
<td>Fields that are ignored for comparison while generating -U, +U changelog for the same record. This configuration is only valid for the changelog-producer.row-deduplicate is true.</td>
</tr>
<tr>
<td><h5>table-read.sequence-number.enabled</h5></td>
<td style="word-wrap: break-word;">false</td>
<td>Boolean</td>
<td>Whether to include the _SEQUENCE_NUMBER field when reading the audit_log or binlog system tables. This is only valid for primary key tables.</td>
</tr>
<tr>
<td><h5>changelog.num-retained.max</h5></td>
<td style="word-wrap: break-word;">(none)</td>
Expand Down Expand Up @@ -1308,6 +1302,12 @@
<td>Duration</td>
<td>The delay duration of stream read when scan incremental snapshots.</td>
</tr>
<tr>
<td><h5>table-read.sequence-number.enabled</h5></td>
<td style="word-wrap: break-word;">false</td>
<td>Boolean</td>
<td>Whether to include the _SEQUENCE_NUMBER field when reading the audit_log or binlog system tables. This is only valid for primary key tables.</td>
</tr>
<tr>
<td><h5>tag.automatic-completion</h5></td>
<td style="word-wrap: break-word;">false</td>
Expand Down Expand Up @@ -1440,6 +1440,24 @@
<td>String</td>
<td>The Variant shredding schema for writing.</td>
</tr>
<tr>
<td><h5>vector-store.fields</h5></td>
<td style="word-wrap: break-word;">(none)</td>
<td>String</td>
<td>Specify the vector store fields.</td>
</tr>
<tr>
<td><h5>vector-store.format</h5></td>
<td style="word-wrap: break-word;">(none)</td>
<td>String</td>
<td>Specify the vector store file format.</td>
</tr>
<tr>
<td><h5>vector-store.target-file-size</h5></td>
<td style="word-wrap: break-word;">(none)</td>
<td>MemorySize</td>
<td>Target size of a vector-store file. Default is 10 * TARGET_FILE_SIZE.</td>
</tr>
<tr>
<td><h5>write-buffer-for-append</h5></td>
<td style="word-wrap: break-word;">false</td>
Expand Down
43 changes: 43 additions & 0 deletions paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -2190,6 +2190,29 @@ public InlineElement getDescription() {
.withDescription(
"Whether to try upgrading the data files after overwriting a primary key table.");

public static final ConfigOption<String> VECTOR_STORE_FORMAT =
key("vector-store.format")
.stringType()
.noDefaultValue()
.withDescription("Specify the vector store file format.");

public static final ConfigOption<String> VECTOR_STORE_FIELDS =
key("vector-store.fields")
.stringType()
.noDefaultValue()
.withDescription("Specify the vector store fields.");

public static final ConfigOption<MemorySize> VECTOR_STORE_TARGET_FILE_SIZE =
key("vector-store.target-file-size")
.memoryType()
.noDefaultValue()
.withDescription(
Description.builder()
.text(
"Target size of a vector-store file."
+ " Default is 10 * TARGET_FILE_SIZE.")
.build());

private final Options options;

public CoreOptions(Map<String, String> options) {
Expand Down Expand Up @@ -3407,6 +3430,26 @@ public boolean overwriteUpgrade() {
return options.get(OVERWRITE_UPGRADE);
}

public String vectorStoreFileFormatString() {
return normalizeFileFormat(options.get(VECTOR_STORE_FORMAT));
}

public List<String> vectorStoreFieldNames() {
String vectorStoreFields = options.get(CoreOptions.VECTOR_STORE_FIELDS);
if (vectorStoreFields == null || vectorStoreFields.trim().isEmpty()) {
return new ArrayList<>();
} else {
return Arrays.asList(vectorStoreFields.split(","));
}
}

public long vectorStoreTargetFileSize() {
// Since vectors are large, it would be better to set a larger target size for vectors.
return options.getOptional(VECTOR_STORE_TARGET_FILE_SIZE)
.map(MemorySize::getBytes)
.orElse(10 * targetFileSize(false));
}

/** Specifies the merge engine for table with primary key. */
public enum MergeEngine implements DescribedEnum {
DEDUPLICATE("deduplicate", "De-duplicate and keep the last row."),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ public List<DataType> visit(ArrayType arrayType) {
return Collections.singletonList(arrayType.getElementType());
}

@Override
public List<DataType> visit(VectorType vectorType) {
return Collections.singletonList(vectorType.getElementType());
}

@Override
public List<DataType> visit(MultisetType multisetType) {
return Collections.singletonList(multisetType.getElementType());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ public R visit(ArrayType arrayType) {
return defaultMethod(arrayType);
}

@Override
public R visit(VectorType vectorType) {
return defaultMethod(vectorType);
}

@Override
public R visit(MultisetType multisetType) {
return defaultMethod(multisetType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ public static DataType parseDataType(JsonNode json, AtomicInteger fieldId) {
if (typeString.startsWith("ARRAY")) {
DataType element = parseDataType(json.get("element"), fieldId);
return new ArrayType(!typeString.contains("NOT NULL"), element);
} else if (typeString.startsWith("VECTOR")) {
DataType element = parseDataType(json.get("element"), fieldId);
int length = json.get("length").asInt();
return new VectorType(!typeString.contains("NOT NULL"), length, element);
} else if (typeString.startsWith("MULTISET")) {
DataType element = parseDataType(json.get("element"), fieldId);
return new MultisetType(!typeString.contains("NOT NULL"), element);
Expand Down Expand Up @@ -318,6 +322,7 @@ private enum Keyword {
SECOND,
TO,
ARRAY,
VECTOR,
MULTISET,
MAP,
ROW,
Expand Down Expand Up @@ -544,6 +549,8 @@ private DataType parseTypeByKeyword() {
return new VariantType();
case BLOB:
return new BlobType();
case VECTOR:
return parseVectorType();
default:
throw parsingError("Unsupported type: " + token().value);
}
Expand Down Expand Up @@ -665,5 +672,16 @@ private int parseOptionalPrecision(int defaultPrecision) {
}
return precision;
}

private DataType parseVectorType() {
// VECTOR<elementType, length>
nextToken(TokenType.BEGIN_SUBTYPE);
DataType elementType = parseTypeWithNullability();
nextToken(TokenType.LIST_SEPARATOR);
nextToken(TokenType.LITERAL_INT);
int length = tokenAsInt();
nextToken(TokenType.END_SUBTYPE);
return DataTypes.VECTOR(length, elementType);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ public enum DataTypeRoot {

ARRAY(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

VECTOR(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

MULTISET(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

MAP(DataTypeFamily.CONSTRUCTED, DataTypeFamily.EXTENSION),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ public interface DataTypeVisitor<R> {

R visit(ArrayType arrayType);

R visit(VectorType vectorType);

R visit(MultisetType multisetType);

R visit(MapType mapType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ public static ArrayType ARRAY(DataType element) {
return new ArrayType(element);
}

public static VectorType VECTOR(int length, DataType element) {
return new VectorType(length, element);
}

public static CharType CHAR(int length) {
return new CharType(length);
}
Expand Down Expand Up @@ -221,6 +225,11 @@ public OptionalInt visit(VarBinaryType varBinaryType) {
return OptionalInt.of(varBinaryType.getLength());
}

@Override
public OptionalInt visit(VectorType vectorType) {
return OptionalInt.of(vectorType.getLength());
}

@Override
protected OptionalInt defaultMethod(DataType dataType) {
return OptionalInt.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ public DataType visit(ArrayType arrayType) {
return new ArrayType(arrayType.isNullable(), arrayType.getElementType().accept(this));
}

@Override
public DataType visit(VectorType vectorType) {
return new VectorType(
vectorType.isNullable(),
vectorType.getLength(),
vectorType.getElementType().accept(this));
}

@Override
public DataType visit(MultisetType multisetType) {
return new MultisetType(
Expand Down
Loading
Loading