File Index#
Interface#
-
class FileIndexer#
File index interface. To read and write a file index.
Public Functions
-
virtual ~FileIndexer() = default#
Create
FileIndexReaderwith input stream.- Parameters:
arrow_schema – ArrowSchema derived from arrow schema or struct type with specified indexed field.
start – Start position of input stream.
length – Length of index bytes.
input_stream – Input stream for read index.
pool – Memory pool for memory allocation.
- Returns:
A
FileIndexReaderto read index.
Create
FileIndexWriterfor arrow schema.- Parameters:
arrow_schema – ArrowSchema derived from arrow schema or struct type with specified indexed field.
pool – Memory pool for memory allocation.
- Returns:
A
FileIndexWriterto write index.
-
virtual ~FileIndexer() = default#
-
class FileIndexerFactory : public paimon::Factory#
File index factory to construct
FileIndexer.Public Functions
-
~FileIndexerFactory() override#
-
virtual Result<std::unique_ptr<FileIndexer>> Create(const std::map<std::string, std::string> &options) const = 0#
Create a
FileIndexerwith specified options.
Public Static Functions
-
static Result<std::unique_ptr<FileIndexer>> Get(const std::string &identifier, const std::map<std::string, std::string> &options)#
Get a
FileIndexeraccording to identifier and options.
-
~FileIndexerFactory() override#
-
class FileIndexWriter#
Interface for writing file-level index data from Arrow batches.
Public Functions
-
virtual ~FileIndexWriter() = default#
-
virtual Status AddBatch(::ArrowArray *batch) = 0#
Adds a batch of data to the index writer.
- Parameters:
batch – Pointer to a C ArrowArray derived from arrow struct array contain specified indexed field.
- Returns:
Status::OK()on success; otherwise, an error indicating failure (e.g., schema mismatch).
-
virtual Result<PAIMON_UNIQUE_PTR<Bytes>> SerializedBytes() const = 0#
Serializes the built index into a byte buffer.
Note
This method returns the complete serialized form of the index after all batches have been added. It can be called only once and typically assumes no further calls to
AddBatch()will occur afterward.- Returns:
A unique pointer to a byte array containing the serialized index data, or an error if serialization fails.
-
virtual ~FileIndexWriter() = default#
-
class FileIndexReader : public paimon::FunctionVisitor<std::shared_ptr<FileIndexResult>>#
Evaluates filter predicates against a file-level index to determine file eligibility.
FileIndexReaderimplements theFunctionVisitorinterface specialized to producestd::shared_ptr<FileIndexResult>objects. It reads pre-built file-level index data (e.g., bitmap, bsi or bloom filters) from index file and evaluates whether a given data file may contain rows matching a specific predicate.Public Functions
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitIsNotNull() override#
Evaluates the IS NOT NULL predicate on the indexed column.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitIsNull() override#
Evaluates the IS NULL predicate on the indexed column.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitEqual(const Literal &literal) override#
Evaluates the equality (==) predicate against the given literal.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitNotEqual(const Literal &literal) override#
Evaluates the inequality (!=) predicate against the given literal.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitLessThan(const Literal &literal) override#
Evaluates the less-than (<) predicate against the given literal.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitLessOrEqual(const Literal &literal) override#
Evaluates the less-than-or-equal (<=) predicate against the given literal.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitGreaterThan(const Literal &literal) override#
Evaluates the greater-than (>) predicate against the given literal.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitGreaterOrEqual(const Literal &literal) override#
Evaluates the greater-than-or-equal (>=) predicate against the given literal.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitIn(const std::vector<Literal> &literals) override#
Evaluates the IN predicate against a list of literals.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitNotIn(const std::vector<Literal> &literals) override#
Evaluates the NOT IN predicate against a list of literals.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitStartsWith(const Literal &prefix) override#
Evaluates whether string values start with the given prefix.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitEndsWith(const Literal &suffix) override#
Evaluates whether string values end with the given prefix.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitContains(const Literal &literal) override#
Evaluates whether string values contain the given substring.
-
virtual Result<std::shared_ptr<FileIndexResult>> VisitIsNotNull() override#
-
class FileIndexFormat#
Defines the on-disk format and versioning for Paimon file-level indexes.
File index file format. Put all column and offset in the header.
Public Static Functions
Creates a
Readerto parse a index file (may contain multiple indexes) from the given input stream.- Parameters:
input_stream – Input stream containing serialized index data.
pool – Memory pool for temporary allocations during reading.
- Returns:
A unique pointer to a
Readeron success, or an error if the stream is invalid (e.g., wrong magic, unsupported version, or corrupted data).
Public Static Attributes
-
static const int64_t MAGIC#
-
static const int32_t EMPTY_INDEX_FLAG#
-
static const int32_t V_1#
-
class Reader#
Reader for file index file.
Public Functions
-
virtual ~Reader() = default#
-
virtual Result<std::vector<std::shared_ptr<FileIndexReader>>> ReadColumnIndex(const std::string &column_name, ::ArrowSchema *arrow_schema) const = 0#
Reads index data for a specific column from the index file.
- Parameters:
column_name – Name of the column to retrieve index data for.
arrow_schema – Arrow schema that must contain a field corresponding to
column_name.
- Returns:
A vector of shared pointers to FileIndexReader objects, each corresponding to a different index type; or an error if the column is not indexed or the index is malformed.
-
virtual ~Reader() = default#
-
class FileIndexResult : public std::enable_shared_from_this<FileIndexResult>#
File index result to decide whether filter a file.
Subclassed by paimon::BitmapIndexResult, paimon::Remain, paimon::Skip
Public Functions
-
virtual ~FileIndexResult() = default#
-
virtual Result<bool> IsRemain() const = 0#
- Returns:
Whether the file is remained.
Compute the intersection of the current result with the provided result.
Compute the union of the current result with the provided result.
-
virtual std::string ToString() const = 0#
Public Static Functions
-
static std::shared_ptr<FileIndexResult> Remain()#
Note
This is a singleton-like utility; all calls return equivalent objects.
- Returns:
A shared instance representing “retain the file”.
-
static std::shared_ptr<FileIndexResult> Skip()#
Note
This is a singleton-like utility; all calls return equivalent objects.
- Returns:
A shared instance representing “skip the file”.
-
virtual ~FileIndexResult() = default#
-
class BitmapIndexResult : public paimon::FileIndexResult#
The implementation of bitmap file index result, represents row granularity.
Note
The inner bitmap in BitmapIndexResult is lazily initialized only when the result is about to be used.
Public Types
-
using BitmapSupplier = std::function<Result<RoaringBitmap32>()>#
Public Functions
-
explicit BitmapIndexResult(BitmapSupplier bitmap_supplier)#
-
~BitmapIndexResult() override#
-
virtual Result<bool> IsRemain() const override#
- Returns:
Whether the file is remained.
Compute the intersection of the current result with the provided result.
Compute the union of the current result with the provided result.
-
Result<const RoaringBitmap32*> GetBitmap() const#
- Returns:
Inner
RoaringBitmap32.
-
virtual std::string ToString() const override#
-
using BitmapSupplier = std::function<Result<RoaringBitmap32>()>#