Feature Admission and Feature Filtering
Provides admission strategies and filtering strategies for HashTable IDs.
Feature Admission
- class recis.nn.hashtable_hook.AdmitHook(name: str, params: dict | None = None)[source]
Feature admission hook for controlling HashTable feature acceptance.
AdmitHook implements feature admission policies that control whether new features (IDs) are allowed to be added to HashTable embeddings. This is useful for implementing read-only modes, feature freezing, or custom admission criteria.
The most common use case is the “ReadOnly” policy, which prevents new features from being added to the embedding table and returns zero embeddings for unknown IDs instead of creating new entries.
Example
Read-only HashTable usage:
from recis.nn import HashTable from recis.nn.hashtable_hook import AdmitHook # Create HashTable ht = HashTable(embedding_shape=[64]) # Create read-only admission hook ro_hook = AdmitHook("ReadOnly") # Lookup with admission control # Known IDs return their embeddings, unknown IDs return zeros embeddings = ht(ids, admit_hook=ro_hook) Integration with DynamicEmbedding:
from recis.nn import DynamicEmbedding, EmbeddingOption from recis.nn.hashtable_hook import AdmitHook # Configure embedding with admission hook emb_opt = EmbeddingOption( embedding_dim=64, shared_name="user_embedding", combiner="sum", admit_hook=AdmitHook("ReadOnly"), ) # Create embedding with read-only policy embedding = DynamicEmbedding(emb_opt) # Use in inference mode (no new embeddings created) ids = torch.LongTensor([1, 2, 3, 4]) emb_output = embedding(ids) Multi-embedding setup with selective admission:
from recis.nn import EmbeddingEngine, EmbeddingOption from recis.nn.hashtable_hook import AdmitHook # Configure different admission policies user_emb_opt = EmbeddingOption( embedding_dim=64, shared_name="user_emb", admit_hook=AdmitHook("ReadOnly"), # Read-only for users ) item_emb_opt = EmbeddingOption( embedding_dim=64, shared_name="item_emb", # No admission hook = normal mode (new items allowed) ) # Create embedding engine embedding_engine = EmbeddingEngine( {"user_emb": user_emb_opt, "item_emb": item_emb_opt} ) # Mixed mode: user embeddings read-only, item embeddings normal samples = {"user_emb": user_ids, "item_emb": item_ids} outputs = embedding_engine(samples)
Feature Filtering
- class recis.nn.hashtable_hook.FilterHook(name: str, params: dict | None = None)[source]
Feature filtering hook for implementing HashTable cleanup strategies.
FilterHook implements feature filtering policies that automatically remove unused or outdated features from HashTable embeddings. This helps manage memory usage and maintain embedding table quality by removing features that are no longer relevant.
The most common policy is “GlobalStepFilter”, which removes features that haven’t been accessed for a specified number of training steps. This is particularly useful in online learning scenarios where feature relevance changes over time.
Example
Basic filtering with step-based cleanup:
from recis.nn import EmbeddingEngine, EmbeddingOption from recis.nn.hashtable_hook import FilterHook from recis.hooks.filter_hook import HashTableFilterHook # Configure embedding with filtering policy user_emb_opt = EmbeddingOption( embedding_dim=64, shared_name="user_emb", combiner="sum", # Remove IDs not seen for 10 steps filter_hook=FilterHook("GlobalStepFilter", {"filter_step": 20}), ) # Create embedding engine embedding_engine = EmbeddingEngine({"user_emb": user_emb_opt}) # Setup filtering hook for periodic cleanup filter_hook = HashTableFilterHook(filter_interval=10) # Check every 10 steps # Training loop with automatic filtering for step in range(100): outputs = embedding_engine(samples) # Trigger filtering check filter_hook.after_step(None, step) if step % 10 == 0: print(f"Step {step}: Automatic cleanup performed") Advanced filtering configuration:
# Multiple embeddings with different filtering policies user_emb_opt = EmbeddingOption( embedding_dim=64, shared_name="user_emb", # Aggressive filtering for user features filter_hook=FilterHook("GlobalStepFilter", {"filter_step": 5}), ) item_emb_opt = EmbeddingOption( embedding_dim=64, shared_name="item_emb", # Conservative filtering for item features filter_hook=FilterHook("GlobalStepFilter", {"filter_step": 50}), ) category_emb_opt = EmbeddingOption( embedding_dim=32, shared_name="category_emb", # No filtering for stable category features ) # Create engine with mixed filtering policies embedding_engine = EmbeddingEngine( { "user_emb": user_emb_opt, "item_emb": item_emb_opt, "category_emb": category_emb_opt, } )
Usage Examples
Admission: Feature Non-admission (Read-only Mode)
HashTable
from recis.nn import HashTable
from recis.nn.hashtable_hook import AdmitHook
ht = HashTable(
embedding_shape=[8],
)
ro_hook = AdmitHook("ReadOnly")
# Lookup embedding table in read-only mode, non-existent IDs directly return zero embeddings
emb_r = ht(ids, admit_hook=ro_hook)
DynamicEmbedding
from recis.nn import DynamicEmbedding, EmbeddingOption
from recis.nn.initializers import TruncNormalInitializer
from recis.nn.hashtable_hook import AdmitHook
# Configure embedding options
emb_opt = EmbeddingOption(
embedding_dim=64,
shared_name="user_embedding",
combiner="sum",
initializer=TruncNormalInitializer(std=0.01),
admit_hook=AdmitHook("ReadOnly"),
)
# Create dynamic embedding
embedding = DynamicEmbedding(emb_opt)
# Lookup embedding table in read-only mode, non-existent IDs directly return zero embeddings
ids = torch.LongTensor([1, 2, 3, 4])
emb_output = embedding(ids)
EmbeddingEngine
from recis.nn import EmbeddingEngine
from recis.nn.hashtable_hook import AdmitHook
# Configure multiple embeddings
user_emb_opt = EmbeddingOption(
embedding_dim=64,
shared_name="user_emb",
combiner="sum",
admit_hook=AdmitHook("ReadOnly"),
)
id_emb_opt = EmbeddingOption(
embedding_dim=64,
shared_name="id_emb",
combiner="sum"
)
# Create embedding engine
embedding_engine = EmbeddingEngine(
{"user_emb": user_emb_opt, "item_emb": item_emb_opt}
)
# Forward propagation
samples = {
"user_emb": user_ids,
"item_emb": item_ids
}
# user_emb looks up embedding in read-only mode, item_emb looks up embedding in normal mode
outputs = embedding_engine(samples)
Filtering: Filter Out IDs That Don’t Appear for Fixed Steps
EmbeddingEngine
from recis.nn import EmbeddingEngine
from recis.nn.hashtable_hook import FilterHook
from recis.hooks.filter_hook import HashTableFilterHook
# Configure multiple embeddings
user_emb_opt = EmbeddingOption(
embedding_dim=64,
shared_name="user_emb",
combiner="sum",
# Add filtering strategy for user_emb: filter out IDs that don't appear for 10 steps
filter_hook=FilterHook("GlobalStepFilter", {"filter_step": 10}),
)
id_emb_opt = EmbeddingOption(
embedding_dim=64,
shared_name="id_emb",
combiner="sum"
)
# Create embedding engine
embedding_engine = EmbeddingEngine(
{"user_emb": user_emb_opt, "item_emb": item_emb_opt}
)
# Forward propagation
samples = {
"user_emb": user_ids,
"item_emb": item_ids
}
# user_emb looks up embedding in read-only mode, item_emb looks up embedding in normal mode
# Check for filterable IDs every 2 steps
hook = HashTableFilterHook(2)
for i in range(100):
outputs = embedding_engine(samples)
hook.after_step(None, i)