Compare commits

...

5 Commits

Author SHA1 Message Date
01131f252e Rank search results by match relevance and recency, update deduplication, and bump version to v0.9.1. 2025-10-01 17:52:20 +01:00
02333b74ae completed fulltext index migration 2025-10-01 17:24:50 +01:00
86ac7b7897 Add full-text search indexing for word tokens and update tokenization logic
- Introduced word index (`WordPrefix`) for tokenized search terms.
- Added word token extraction in event and filter processing.
- Implemented Unicode-aware, case-insensitive tokenizer with URL, mention, and hex filters.
- Extended full-text indexing to include tags and content.
2025-10-01 15:03:41 +01:00
7e6adf9fba Adjust BadgerDB memory configurations to optimize resource usage and bump version to v0.8.9. 2025-10-01 12:52:45 +01:00
7d5ebd5ccd Adjust BadgerDB memory settings to prevent OOM issues and update version to v0.8.8. 2025-10-01 12:40:34 +01:00
16 changed files with 1136 additions and 193 deletions

View File

@@ -96,4 +96,4 @@ log statements to help locate the cause of bugs
always use Go v1.25.1 for everything involving Go
always use the nips repository that is available at /nips in the root of the repository for documentation about nostr protocol
always use the nips repository also for information, found at ../github.com/nostr-protocol/nips attached to the project

View File

@@ -46,6 +46,7 @@ func (s *Server) HandleRelayInfo(w http.ResponseWriter, r *http.Request) {
relayinfo.ExpirationTimestamp,
relayinfo.ProtectedEvents,
relayinfo.RelayListMetadata,
relayinfo.SearchCapability,
)
if s.Config.ACLMode != "none" {
supportedNIPs = relayinfo.GetList(
@@ -62,6 +63,7 @@ func (s *Server) HandleRelayInfo(w http.ResponseWriter, r *http.Request) {
relayinfo.ExpirationTimestamp,
relayinfo.ProtectedEvents,
relayinfo.RelayListMetadata,
relayinfo.SearchCapability,
)
}
sort.Sort(supportedNIPs)

160
app/web/dist/index-4xsq3yxw.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -5,7 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Nostr Relay</title>
<link rel="stylesheet" crossorigin href="./index-q4cwd1fy.css"><script type="module" crossorigin src="./index-w8zpqk4w.js"></script></head>
<link rel="stylesheet" crossorigin href="./index-q4cwd1fy.css"><script type="module" crossorigin src="./index-4xsq3yxw.js"></script></head>
<body>
<script>
// Apply system theme preference immediately to avoid flash of wrong theme

View File

@@ -25,6 +25,14 @@ function App() {
const [allEventsHasMore, setAllEventsHasMore] = useState(true);
const [expandedAllEventId, setExpandedAllEventId] = useState(null);
// Search state
const [searchQuery, setSearchQuery] = useState('');
const [searchResults, setSearchResults] = useState([]);
const [searchLoading, setSearchLoading] = useState(false);
const [searchOffset, setSearchOffset] = useState(0);
const [searchHasMore, setSearchHasMore] = useState(true);
const [expandedSearchEventId, setExpandedSearchEventId] = useState(null);
// Profile cache for All Events Log
const [profileCache, setProfileCache] = useState({});
@@ -68,6 +76,7 @@ function App() {
exportAll: false,
exportSpecific: false,
importEvents: false,
search: true,
eventsLog: false,
allEventsLog: false
});
@@ -992,6 +1001,177 @@ function App() {
}
}
// Search functions
function processSearchResponse(receivedEvents, reset) {
try {
const filtered = filterDeletedEvents(receivedEvents);
const sorted = filtered.sort((a, b) => b.created_at - a.created_at);
const currentOffset = reset ? 0 : searchOffset;
const limit = 50;
const page = sorted.slice(currentOffset, currentOffset + limit);
if (reset) {
setSearchResults(page);
setSearchOffset(page.length);
} else {
setSearchResults(prev => [...prev, ...page]);
setSearchOffset(prev => prev + page.length);
}
setSearchHasMore(currentOffset + page.length < sorted.length);
// fetch profiles for authors in search results
fetchProfilesForEvents(page);
} catch (e) {
console.error('Error processing search results:', e);
} finally {
setSearchLoading(false);
}
}
async function fetchSearchResultsFromRelay(query, reset = true, limit = 50, timeoutMs = 10000) {
if (!query || !query.trim()) {
// clear results on empty query when resetting
if (reset) {
setSearchResults([]);
setSearchOffset(0);
setSearchHasMore(true);
}
return;
}
if (searchLoading) return;
if (!reset && !searchHasMore) return;
setSearchLoading(true);
return new Promise((resolve) => {
let resolved = false;
let receivedEvents = [];
let ws;
let reqSent = false;
try {
ws = new WebSocket(relayURL());
} catch (e) {
console.error('Failed to create WebSocket:', e);
setSearchLoading(false);
resolve();
return;
}
const subId = 'search-' + Math.random().toString(36).slice(2);
const timer = setTimeout(() => {
if (ws && ws.readyState === 1) {
try { ws.close(); } catch (_) {}
}
if (!resolved) {
resolved = true;
processSearchResponse(receivedEvents, reset);
resolve();
}
}, timeoutMs);
const sendRequest = () => {
if (!reqSent && ws && ws.readyState === 1) {
try {
const req = ['REQ', subId, { search: query }];
ws.send(JSON.stringify(req));
reqSent = true;
} catch (e) {
console.error('Failed to send WebSocket request:', e);
}
}
};
ws.onopen = () => sendRequest();
ws.onmessage = async (msg) => {
try {
const data = JSON.parse(msg.data);
const type = data[0];
if (type === 'AUTH') {
const challenge = data[1];
if (!window.nostr) {
clearTimeout(timer);
if (!resolved) {
resolved = true;
processSearchResponse(receivedEvents, reset);
resolve();
}
return;
}
try {
const authEvent = { kind: 22242, created_at: Math.floor(Date.now()/1000), tags: [['relay', relayURL()], ['challenge', challenge]], content: '' };
const signed = await window.nostr.signEvent(authEvent);
ws.send(JSON.stringify(['AUTH', signed]));
} catch (authErr) {
console.error('Search auth failed:', authErr);
clearTimeout(timer);
if (!resolved) {
resolved = true;
processSearchResponse(receivedEvents, reset);
resolve();
}
}
} else if (type === 'EVENT' && data[1] === subId) {
const ev = data[2];
if (ev) {
receivedEvents.push({
id: ev.id,
kind: ev.kind,
created_at: ev.created_at,
content: ev.content || '',
author: ev.pubkey || '',
raw_json: JSON.stringify(ev)
});
}
} else if (type === 'EOSE' && data[1] === subId) {
try { ws.send(JSON.stringify(['CLOSE', subId])); } catch (_) {}
try { ws.close(); } catch (_) {}
clearTimeout(timer);
if (!resolved) {
resolved = true;
processSearchResponse(receivedEvents, reset);
resolve();
}
} else if (type === 'CLOSED' && data[1] === subId) {
clearTimeout(timer);
if (!resolved) {
resolved = true;
processSearchResponse(receivedEvents, reset);
resolve();
}
} else if (type === 'OK' && data[1] && data[1].length === 64 && !reqSent) {
sendRequest();
}
} catch (e) {
console.error('Search WS message parse error:', e);
}
};
ws.onerror = (err) => {
console.error('Search WS error:', err);
try { ws.close(); } catch (_) {}
clearTimeout(timer);
if (!resolved) {
resolved = true;
processSearchResponse(receivedEvents, reset);
resolve();
}
};
ws.onclose = () => {
clearTimeout(timer);
if (!resolved) {
resolved = true;
processSearchResponse(receivedEvents, reset);
resolve();
}
};
});
}
function toggleSearchEventExpansion(eventId) {
setExpandedSearchEventId(current => current === eventId ? null : eventId);
}
// Events log functions
async function fetchEvents(reset = false) {
await fetchEventsFromRelay(reset);
@@ -1617,6 +1797,140 @@ function App() {
</div>
</>
)}
{/* Search */}
<div className={`m-2 p-2 ${getPanelBgClass()} rounded-lg w-full`}>
<div
className={`text-lg font-bold flex items-center justify-between cursor-pointer p-2 ${getTextClass()} ${getThemeClasses('hover:bg-gray-300', 'hover:bg-gray-700')} rounded`}
onClick={() => toggleSection('search')}
>
<span>Search</span>
<span className="text-xl">
{expandedSections.search ? '▼' : '▶'}
</span>
</div>
{expandedSections.search && (
<div className="p-2 bg-gray-900 rounded-lg mt-2">
<div className="flex gap-2 items-center mb-3">
<input
type="text"
placeholder="Search notes..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
onKeyDown={(e) => { if (e.key === 'Enter') { fetchSearchResultsFromRelay(searchQuery, true); } }}
className={`${getThemeClasses('bg-white text-black border-gray-300', 'bg-gray-800 text-white border-gray-600')} border rounded px-3 py-2 flex-grow`}
/>
<button
className={`${getThemeClasses('bg-blue-600 hover:bg-blue-700', 'bg-blue-500 hover:bg-blue-600')} text-white px-4 py-2 rounded`}
onClick={() => fetchSearchResultsFromRelay(searchQuery, true)}
disabled={searchLoading}
title="Search"
>
{searchLoading ? 'Searching…' : 'Search'}
</button>
</div>
<div className="space-y-2">
{searchResults.length === 0 && !searchLoading && (
<div className={`text-center py-4 ${getTextClass()}`}>No results</div>
)}
{searchResults.map((event) => (
<div key={event.id} className={`border rounded p-3 ${getThemeClasses('border-gray-300 bg-white', 'border-gray-600 bg-gray-800')}`}>
<div className="cursor-pointer" onClick={() => toggleSearchEventExpansion(event.id)}>
<div className="flex items-center justify-between w-full">
<div className="flex items-center gap-6 w-full">
<div className="flex items-center gap-3 min-w-0">
{event.author && profileCache[event.author] && (
<>
{profileCache[event.author].picture && (
<img
src={profileCache[event.author].picture}
alt={profileCache[event.author].display_name || profileCache[event.author].name || 'User avatar'}
className={`w-8 h-8 rounded-full object-cover border h-16 ${getThemeClasses('border-gray-300', 'border-gray-600')}`}
onError={(e) => { e.currentTarget.style.display = 'none'; }}
/>
)}
<div className="flex flex-col flex-grow w-full">
<span className={`text-sm font-medium ${getTextClass()}`}>
{profileCache[event.author].display_name || profileCache[event.author].name || `${event.author.slice(0, 8)}...`}
</span>
{profileCache[event.author].display_name && profileCache[event.author].name && (
<span className={`text-xs ${getTextClass()} opacity-70`}>
{profileCache[event.author].name}
</span>
)}
</div>
</>
)}
{event.author && !profileCache[event.author] && (
<span className={`text-sm font-medium ${getTextClass()}`}>
{`${event.author.slice(0, 8)}...`}
</span>
)}
</div>
<div className="flex items-center gap-3">
<span className={`font-mono text-sm px-2 py-1 rounded ${getThemeClasses('bg-blue-100 text-blue-800', 'bg-blue-900 text-blue-200')}`}>
Kind {event.kind}
</span>
<span className={`text-sm ${getTextClass()}`}>
{formatTimestamp(event.created_at)}
</span>
</div>
</div>
<div className="justify-end ml-auto rounded-full h-16 w-16 flex items-center justify-center">
<div className={`text-white text-xs px-4 py-4 rounded flex flex-grow items-center ${getThemeClasses('text-gray-700', 'text-gray-300')}`}>
{expandedSearchEventId === event.id ? '▼' : ' '}
</div>
<button
className="bg-red-600 hover:bg-red-700 text-white text-xs px-1 py-1 rounded flex items-center"
onClick={(e) => { e.stopPropagation(); deleteEvent(event.id, event.raw_json, event.author); }}
title="Delete this event"
>
🗑
</button>
</div>
</div>
{event.content && (
<div className={`mt-2 text-sm ${getTextClass()}`}>
{truncateContent(event.content)}
</div>
)}
</div>
{expandedSearchEventId === event.id && (
<div className={`mt-3 p-3 rounded ${getThemeClasses('bg-gray-100', 'bg-gray-900')}`} onClick={(e) => e.stopPropagation()}>
<div className="flex items-center justify-between mb-2">
<span className={`text-sm font-semibold ${getTextClass()}`}>Raw JSON</span>
<button
className={`${getThemeClasses('bg-gray-200 hover:bg-gray-300 text-black', 'bg-gray-800 hover:bg-gray-700 text-white')} text-xs px-2 py-1 rounded`}
onClick={() => copyEventJSON(event.raw_json)}
>
Copy JSON
</button>
</div>
<pre className={`text-xs overflow-auto max-h-64 ${getThemeClasses('bg-white text-black', 'bg-gray-950 text-gray-200')} p-2 rounded`}>{event.raw_json}</pre>
</div>
)}
</div>
))}
{!searchLoading && searchHasMore && searchResults.length > 0 && (
<div className="text-center py-4">
<button
className={`${getThemeClasses('bg-blue-600 hover:bg-blue-700', 'bg-blue-500 hover:bg-blue-600')} text-white px-4 py-2 rounded`}
onClick={() => fetchSearchResultsFromRelay(searchQuery, false)}
>
Load More
</button>
</div>
)}
</div>
</div>
)}
</div>
{/* My Events Log */}
<div className={`m-2 p-2 ${getPanelBgClass()} rounded-lg w-full`}>
<div

View File

@@ -52,8 +52,18 @@ func New(
}
opts := badger.DefaultOptions(d.dataDir)
opts.BlockCacheSize = int64(units.Gb)
opts.BlockSize = units.Gb
// Use sane defaults to avoid excessive memory usage during startup.
// Badger's default BlockSize is small (e.g., 4KB). Overriding it to very large values
// can cause massive allocations and OOM panics during deployments.
// Set BlockCacheSize to a moderate value and keep BlockSize small.
opts.BlockCacheSize = int64(256 * units.Mb) // 256 MB cache
opts.BlockSize = 4 * units.Kb // 4 KB block size
// Prevent huge allocations during table building and memtable flush.
// Badger's TableBuilder buffer is sized by BaseTableSize; ensure it's small.
opts.BaseTableSize = 64 * units.Mb // 64 MB per table (default ~2MB, increased for fewer files but safe)
opts.MemTableSize = 64 * units.Mb // 64 MB memtable to match table size
// Keep value log files to a moderate size as well
opts.ValueLogFileSize = 256 * units.Mb // 256 MB value log files
opts.CompactL0OnClose = true
opts.LmaxCompaction = true
opts.Compression = options.None

View File

@@ -153,5 +153,35 @@ func GetIndexesForEvent(ev *event.E, serial uint64) (
if err = appendIndexBytes(&idxs, kindPubkeyIndex); chk.E(err) {
return
}
// Word token indexes (from content)
if len(ev.Content) > 0 {
for _, h := range TokenHashes(ev.Content) {
w := new(Word)
w.FromWord(h) // 8-byte truncated hash
wIdx := indexes.WordEnc(w, ser)
if err = appendIndexBytes(&idxs, wIdx); chk.E(err) {
return
}
}
}
// Extend full-text search to include all fields of all tags
if ev.Tags != nil && ev.Tags.Len() > 0 {
for _, t := range *ev.Tags {
for _, field := range t.T { // include key and all values
if len(field) == 0 {
continue
}
for _, h := range TokenHashes(field) {
w := new(Word)
w.FromWord(h)
wIdx := indexes.WordEnc(w, ser)
if err = appendIndexBytes(&idxs, wIdx); chk.E(err) {
return
}
}
}
}
}
return
}

View File

@@ -113,6 +113,27 @@ func GetIndexesFromFilter(f *filter.F) (idxs []Range, err error) {
return
}
// Word search: if Search field is present, generate word index ranges
if len(f.Search) > 0 {
for _, h := range TokenHashes(f.Search) {
w := new(types2.Word)
w.FromWord(h)
buf := new(bytes.Buffer)
idx := indexes.WordEnc(w, nil)
if err = idx.MarshalWrite(buf); chk.E(err) {
return
}
b := buf.Bytes()
end := make([]byte, len(b))
copy(end, b)
for i := 0; i < 5; i++ { // match any serial
end = append(end, 0xff)
}
idxs = append(idxs, Range{b, end})
}
return
}
caStart := new(types2.Uint64)
caEnd := new(types2.Uint64)

View File

@@ -69,6 +69,7 @@ const (
TagPubkeyPrefix = I("tpc") // tag, pubkey, created at
TagKindPubkeyPrefix = I("tkp") // tag, kind, pubkey, created at
WordPrefix = I("wrd") // word hash, serial
ExpirationPrefix = I("exp") // timestamp of expiration
VersionPrefix = I("ver") // database version number, for triggering reindexes when new keys are added (policy is add-only).
)
@@ -106,6 +107,8 @@ func Prefix(prf int) (i I) {
return ExpirationPrefix
case Version:
return VersionPrefix
case Word:
return WordPrefix
}
return
}
@@ -147,6 +150,8 @@ func Identify(r io.Reader) (i int, err error) {
case ExpirationPrefix:
i = Expiration
case WordPrefix:
i = Word
}
return
}
@@ -233,6 +238,21 @@ func FullIdPubkeyDec(
return New(NewPrefix(), ser, fid, p, ca)
}
// Word index for tokenized search terms
//
// 3 prefix|8 word-hash|5 serial
var Word = next()
func WordVars() (w *types.Word, ser *types.Uint40) {
return new(types.Word), new(types.Uint40)
}
func WordEnc(w *types.Word, ser *types.Uint40) (enc *T) {
return New(NewPrefix(Word), w, ser)
}
func WordDec(w *types.Word, ser *types.Uint40) (enc *T) {
return New(NewPrefix(), w, ser)
}
// CreatedAt is an index that allows search for the timestamp on the event.
//
// 3 prefix|8 timestamp|5 serial

View File

@@ -14,7 +14,7 @@ import (
)
const (
currentVersion uint32 = 1
currentVersion uint32 = 2
)
func (d *D) RunMigrations() {
@@ -56,22 +56,8 @@ func (d *D) RunMigrations() {
}
if dbVersion == 0 {
log.D.F("no version tag found, creating...")
// write the version tag now
if err = d.Update(
func(txn *badger.Txn) (err error) {
buf := new(bytes.Buffer)
vv := new(types.Uint32)
vv.Set(currentVersion)
log.I.S(vv)
if err = indexes.VersionEnc(vv).MarshalWrite(buf); chk.E(err) {
return
}
if err = txn.Set(buf.Bytes(), nil); chk.E(err) {
return
}
return
},
); chk.E(err) {
// write the version tag now (ensure any old tags are removed first)
if err = d.writeVersionTag(currentVersion); chk.E(err) {
return
}
}
@@ -79,7 +65,136 @@ func (d *D) RunMigrations() {
log.I.F("migrating to version 1...")
// the first migration is expiration tags
d.UpdateExpirationTags()
// bump to version 1
_ = d.writeVersionTag(1)
}
if dbVersion < 2 {
log.I.F("migrating to version 2...")
// backfill word indexes
d.UpdateWordIndexes()
// bump to version 2
_ = d.writeVersionTag(2)
}
}
// writeVersionTag writes a new version tag key to the database (no value)
func (d *D) writeVersionTag(ver uint32) (err error) {
return d.Update(
func(txn *badger.Txn) (err error) {
// delete any existing version keys first (there should only be one, but be safe)
verPrf := new(bytes.Buffer)
if _, err = indexes.VersionPrefix.Write(verPrf); chk.E(err) {
return
}
it := txn.NewIterator(badger.IteratorOptions{Prefix: verPrf.Bytes()})
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
key := item.KeyCopy(nil)
if err = txn.Delete(key); chk.E(err) {
return
}
}
// now write the new version key
buf := new(bytes.Buffer)
vv := new(types.Uint32)
vv.Set(ver)
if err = indexes.VersionEnc(vv).MarshalWrite(buf); chk.E(err) {
return
}
return txn.Set(buf.Bytes(), nil)
},
)
}
func (d *D) UpdateWordIndexes() {
log.T.F("updating word indexes...")
var err error
var wordIndexes [][]byte
// iterate all events and generate word index keys from content and tags
if err = d.View(
func(txn *badger.Txn) (err error) {
prf := new(bytes.Buffer)
if err = indexes.EventEnc(nil).MarshalWrite(prf); chk.E(err) {
return
}
it := txn.NewIterator(badger.IteratorOptions{Prefix: prf.Bytes()})
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
var val []byte
if val, err = item.ValueCopy(nil); chk.E(err) {
continue
}
// decode the event
ev := new(event.E)
if err = ev.UnmarshalBinary(bytes.NewBuffer(val)); chk.E(err) {
continue
}
// log.I.F("updating word indexes for event: %s", ev.Serialize())
// read serial from key
key := item.Key()
ser := indexes.EventVars()
if err = indexes.EventDec(ser).UnmarshalRead(bytes.NewBuffer(key)); chk.E(err) {
continue
}
// collect unique word hashes for this event
seen := make(map[string]struct{})
// from content
if len(ev.Content) > 0 {
for _, h := range TokenHashes(ev.Content) {
seen[string(h)] = struct{}{}
}
}
// from all tag fields (key and values)
if ev.Tags != nil && ev.Tags.Len() > 0 {
for _, t := range *ev.Tags {
for _, field := range t.T {
if len(field) == 0 {
continue
}
for _, h := range TokenHashes(field) {
seen[string(h)] = struct{}{}
}
}
}
}
// build keys
for k := range seen {
w := new(types.Word)
w.FromWord([]byte(k))
buf := new(bytes.Buffer)
if err = indexes.WordEnc(
w, ser,
).MarshalWrite(buf); chk.E(err) {
continue
}
wordIndexes = append(wordIndexes, buf.Bytes())
}
}
return
},
); chk.E(err) {
return
}
// sort the indexes for ordered writes
sort.Slice(
wordIndexes, func(i, j int) bool {
return bytes.Compare(
wordIndexes[i], wordIndexes[j],
) < 0
},
)
// write in a batch
batch := d.NewWriteBatch()
for _, v := range wordIndexes {
if err = batch.Set(v, nil); chk.E(err) {
continue
}
}
_ = batch.Flush()
log.T.F("finished updating word indexes...")
}
func (d *D) UpdateExpirationTags() {

View File

@@ -0,0 +1,194 @@
package database
import (
"context"
"os"
"testing"
"time"
"lol.mleku.dev/chk"
"next.orly.dev/pkg/crypto/p256k"
"next.orly.dev/pkg/encoders/event"
"next.orly.dev/pkg/encoders/filter"
"next.orly.dev/pkg/encoders/kind"
"next.orly.dev/pkg/encoders/tag"
"next.orly.dev/pkg/encoders/timestamp"
)
// helper to create a fresh DB
func newTestDB(t *testing.T) (*D, context.Context, context.CancelFunc, string) {
t.Helper()
tempDir, err := os.MkdirTemp("", "search-db-*")
if err != nil {
t.Fatalf("Failed to create temp dir: %v", err)
}
ctx, cancel := context.WithCancel(context.Background())
db, err := New(ctx, cancel, tempDir, "error")
if err != nil {
cancel()
os.RemoveAll(tempDir)
t.Fatalf("Failed to init DB: %v", err)
}
return db, ctx, cancel, tempDir
}
// TestQueryEventsBySearchTerms creates a small set of events with content and tags,
// saves them, then queries using filter.Search to ensure the word index works.
func TestQueryEventsBySearchTerms(t *testing.T) {
db, ctx, cancel, tempDir := newTestDB(t)
defer func() {
// cancel context first to stop background routines cleanly
cancel()
db.Close()
os.RemoveAll(tempDir)
}()
// signer for all events
sign := new(p256k.Signer)
if err := sign.Generate(); chk.E(err) {
t.Fatalf("signer generate: %v", err)
}
now := timestamp.Now().V
// Events to cover tokenizer rules:
// - regular words
// - URLs ignored
// - 64-char hex ignored
// - nostr: URIs ignored
// - #[n] mentions ignored
// - tag fields included in search
// 1. Contains words: "alpha beta", plus URL and hex (ignored)
ev1 := event.New()
ev1.Kind = kind.TextNote.K
ev1.Pubkey = sign.Pub()
ev1.CreatedAt = now - 5
ev1.Content = []byte("Alpha beta visit https://example.com deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
ev1.Tags = tag.NewS()
ev1.Sign(sign)
if _, _, err := db.SaveEvent(ctx, ev1); err != nil {
t.Fatalf("save ev1: %v", err)
}
// 2. Contains overlap word "beta" and unique "gamma" and nostr: URI ignored
ev2 := event.New()
ev2.Kind = kind.TextNote.K
ev2.Pubkey = sign.Pub()
ev2.CreatedAt = now - 4
ev2.Content = []byte("beta and GAMMA with nostr:nevent1qqqqq")
ev2.Tags = tag.NewS()
ev2.Sign(sign)
if _, _, err := db.SaveEvent(ctx, ev2); err != nil {
t.Fatalf("save ev2: %v", err)
}
// 3. Contains only a URL (should not create word tokens) and mention #[1] (ignored)
ev3 := event.New()
ev3.Kind = kind.TextNote.K
ev3.Pubkey = sign.Pub()
ev3.CreatedAt = now - 3
ev3.Content = []byte("see www.example.org #[1]")
ev3.Tags = tag.NewS()
ev3.Sign(sign)
if _, _, err := db.SaveEvent(ctx, ev3); err != nil {
t.Fatalf("save ev3: %v", err)
}
// 4. No content words, but tag value has searchable words: "delta epsilon"
ev4 := event.New()
ev4.Kind = kind.TextNote.K
ev4.Pubkey = sign.Pub()
ev4.CreatedAt = now - 2
ev4.Content = []byte("")
ev4.Tags = tag.NewS()
*ev4.Tags = append(*ev4.Tags, tag.NewFromAny("t", "delta epsilon"))
ev4.Sign(sign)
if _, _, err := db.SaveEvent(ctx, ev4); err != nil {
t.Fatalf("save ev4: %v", err)
}
// 5. Another event with both content and tag tokens for ordering checks
ev5 := event.New()
ev5.Kind = kind.TextNote.K
ev5.Pubkey = sign.Pub()
ev5.CreatedAt = now - 1
ev5.Content = []byte("alpha DELTA mixed-case and link http://foo.bar")
ev5.Tags = tag.NewS()
*ev5.Tags = append(*ev5.Tags, tag.NewFromAny("t", "zeta"))
ev5.Sign(sign)
if _, _, err := db.SaveEvent(ctx, ev5); err != nil {
t.Fatalf("save ev5: %v", err)
}
// Small sleep to ensure created_at ordering is the only factor
time.Sleep(5 * time.Millisecond)
// Helper to run a search and return IDs
run := func(q string) ([]*event.E, error) {
f := &filter.F{Search: []byte(q)}
return db.QueryEvents(ctx, f)
}
// Single-term search: alpha -> should match ev1 and ev5 ordered by created_at desc (ev5 newer)
if evs, err := run("alpha"); err != nil {
t.Fatalf("search alpha: %v", err)
} else {
if len(evs) != 2 {
t.Fatalf("alpha expected 2 results, got %d", len(evs))
}
if !(evs[0].CreatedAt >= evs[1].CreatedAt) {
t.Fatalf("results not ordered by created_at desc")
}
}
// Overlap term beta -> ev1 and ev2
if evs, err := run("beta"); err != nil {
t.Fatalf("search beta: %v", err)
} else if len(evs) != 2 {
t.Fatalf("beta expected 2 results, got %d", len(evs))
}
// Unique term gamma -> only ev2
if evs, err := run("gamma"); err != nil {
t.Fatalf("search gamma: %v", err)
} else if len(evs) != 1 {
t.Fatalf("gamma expected 1 result, got %d", len(evs))
}
// URL terms should be ignored: example -> appears only as URL in ev1/ev3/ev5; tokenizer ignores URLs so expect 0
if evs, err := run("example"); err != nil {
t.Fatalf("search example: %v", err)
} else if len(evs) != 0 {
t.Fatalf("example expected 0 results (URL tokens ignored), got %d", len(evs))
}
// Tag words searchable: delta should match ev4 and ev5 (delta in tag for ev4, in content for ev5)
if evs, err := run("delta"); err != nil {
t.Fatalf("search delta: %v", err)
} else if len(evs) != 2 {
t.Fatalf("delta expected 2 results, got %d", len(evs))
}
// Very short token ignored: single-letter should yield 0
if evs, err := run("a"); err != nil {
t.Fatalf("search short token: %v", err)
} else if len(evs) != 0 {
t.Fatalf("single-letter expected 0 results, got %d", len(evs))
}
// 64-char hex should be ignored
hex64 := "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
if evs, err := run(hex64); err != nil {
t.Fatalf("search hex64: %v", err)
} else if len(evs) != 0 {
t.Fatalf("hex64 expected 0 results, got %d", len(evs))
}
// nostr: scheme ignored
if evs, err := run("nostr:nevent1qqqqq"); err != nil {
t.Fatalf("search nostr: %v", err)
} else if len(evs) != 0 {
t.Fatalf("nostr: expected 0 results, got %d", len(evs))
}
}

View File

@@ -13,7 +13,9 @@ import (
// QueryForIds retrieves a list of IdPkTs based on the provided filter.
// It supports filtering by ranges and tags but disallows filtering by Ids.
// Results are sorted by timestamp in reverse chronological order.
// Results are sorted by timestamp in reverse chronological order by default.
// When a search query is present, results are ranked by a 50/50 blend of
// match count (how many distinct search terms matched) and recency.
// Returns an error if the filter contains Ids or if any operation fails.
func (d *D) QueryForIds(c context.Context, f *filter.F) (
idPkTs []*store.IdPkTs, err error,
@@ -29,6 +31,9 @@ func (d *D) QueryForIds(c context.Context, f *filter.F) (
}
var results []*store.IdPkTs
var founds []*types.Uint40
// When searching, we want to count how many index ranges (search terms)
// matched each note. We'll track counts by serial.
counts := make(map[uint64]int)
for _, idx := range idxs {
if founds, err = d.GetSerialsByRange(idx); chk.E(err) {
return
@@ -37,6 +42,12 @@ func (d *D) QueryForIds(c context.Context, f *filter.F) (
if tmp, err = d.GetFullIdPubkeyBySerials(founds); chk.E(err) {
return
}
// If this query is driven by Search terms, increment count per serial
if len(f.Search) > 0 {
for _, v := range tmp {
counts[v.Ser]++
}
}
results = append(results, tmp...)
}
// deduplicate in case this somehow happened (such as two or more
@@ -48,12 +59,58 @@ func (d *D) QueryForIds(c context.Context, f *filter.F) (
idPkTs = append(idPkTs, idpk)
}
}
// sort results by timestamp in reverse chronological order
sort.Slice(
idPkTs, func(i, j int) bool {
return idPkTs[i].Ts > idPkTs[j].Ts
},
)
if len(f.Search) == 0 {
// No search query: sort by timestamp in reverse chronological order
sort.Slice(
idPkTs, func(i, j int) bool {
return idPkTs[i].Ts > idPkTs[j].Ts
},
)
} else {
// Search query present: blend match count relevance with recency (50/50)
// Normalize both match count and timestamp to [0,1] and compute score.
var maxCount int
var minTs, maxTs int64
if len(idPkTs) > 0 {
minTs, maxTs = idPkTs[0].Ts, idPkTs[0].Ts
}
for _, v := range idPkTs {
if c := counts[v.Ser]; c > maxCount {
maxCount = c
}
if v.Ts < minTs {
minTs = v.Ts
}
if v.Ts > maxTs {
maxTs = v.Ts
}
}
// Precompute denominator to avoid div-by-zero
tsSpan := maxTs - minTs
if tsSpan <= 0 {
tsSpan = 1
}
if maxCount <= 0 {
maxCount = 1
}
sort.Slice(
idPkTs, func(i, j int) bool {
ci := float64(counts[idPkTs[i].Ser]) / float64(maxCount)
cj := float64(counts[idPkTs[j].Ser]) / float64(maxCount)
ai := float64(idPkTs[i].Ts-minTs) / float64(tsSpan)
aj := float64(idPkTs[j].Ts-minTs) / float64(tsSpan)
si := 0.5*ci + 0.5*ai
sj := 0.5*cj + 0.5*aj
if si == sj {
// tie-break by recency
return idPkTs[i].Ts > idPkTs[j].Ts
}
return si > sj
},
)
}
if f.Limit != nil && len(idPkTs) > int(*f.Limit) {
idPkTs = idPkTs[:*f.Limit]
}

View File

@@ -9,10 +9,12 @@ import (
"github.com/dgraph-io/badger/v4"
"lol.mleku.dev/chk"
"lol.mleku.dev/log"
"next.orly.dev/pkg/database/indexes"
"next.orly.dev/pkg/database/indexes/types"
"next.orly.dev/pkg/encoders/event"
"next.orly.dev/pkg/encoders/filter"
"next.orly.dev/pkg/encoders/hex"
"next.orly.dev/pkg/encoders/kind"
"next.orly.dev/pkg/encoders/tag"
)
@@ -230,10 +232,10 @@ func (d *D) SaveEvent(c context.Context, ev *event.E) (kc, vc int, err error) {
return
},
)
// log.T.F(
// "total data written: %d bytes keys %d bytes values for event ID %s", kc,
// vc, hex.Enc(ev.ID),
// )
log.T.F(
"total data written: %d bytes keys %d bytes values for event ID %s", kc,
vc, hex.Enc(ev.ID),
)
// log.T.C(
// func() string {
// return fmt.Sprintf("event:\n%s\n", ev.Serialize())

178
pkg/database/tokenize.go Normal file
View File

@@ -0,0 +1,178 @@
package database
import (
"strings"
"unicode"
sha "next.orly.dev/pkg/crypto/sha256"
)
// TokenHashes extracts unique word hashes (8-byte truncated sha256) from content.
// Rules:
// - Unicode-aware: words are sequences of letters or numbers.
// - Lowercased using unicode case mapping.
// - Ignore URLs (starting with http://, https://, www., or containing "://").
// - Ignore nostr: URIs and #[n] mentions.
// - Ignore words shorter than 2 runes.
// - Exclude 64-character hexadecimal strings (likely IDs/pubkeys).
func TokenHashes(content []byte) [][]byte {
s := string(content)
var out [][]byte
seen := make(map[string]struct{})
i := 0
for i < len(s) {
r, size := rune(s[i]), 1
if r >= 0x80 {
r, size = utf8DecodeRuneInString(s[i:])
}
// Skip whitespace
if unicode.IsSpace(r) {
i += size
continue
}
// Skip URLs and schemes
if hasPrefixFold(s[i:], "http://") || hasPrefixFold(s[i:], "https://") || hasPrefixFold(s[i:], "nostr:") || hasPrefixFold(s[i:], "www.") {
i = skipUntilSpace(s, i)
continue
}
// If token contains "://" ahead, treat as URL and skip to space
if j := strings.Index(s[i:], "://"); j == 0 || (j > 0 && isWordStart(r)) {
// Only if it's at start of token
before := s[i : i+j]
if len(before) == 0 || allAlphaNum(before) {
i = skipUntilSpace(s, i)
continue
}
}
// Skip #[n] mentions
if r == '#' && i+size < len(s) && s[i+size] == '[' {
end := strings.IndexByte(s[i:], ']')
if end >= 0 {
i += end + 1
continue
}
}
// Collect a word
start := i
var runes []rune
for i < len(s) {
r2, size2 := rune(s[i]), 1
if r2 >= 0x80 {
r2, size2 = utf8DecodeRuneInString(s[i:])
}
if unicode.IsLetter(r2) || unicode.IsNumber(r2) {
runes = append(runes, unicode.ToLower(r2))
i += size2
continue
}
break
}
// If we didn't consume any rune for a word, advance by one rune to avoid stalling
if i == start {
_, size2 := utf8DecodeRuneInString(s[i:])
i += size2
continue
}
if len(runes) >= 2 {
w := string(runes)
// Exclude 64-char hex strings
if isHex64(w) {
continue
}
if _, ok := seen[w]; !ok {
seen[w] = struct{}{}
h := sha.Sum256([]byte(w))
out = append(out, h[:8])
}
}
}
return out
}
func hasPrefixFold(s, prefix string) bool {
if len(s) < len(prefix) {
return false
}
for i := 0; i < len(prefix); i++ {
c := s[i]
p := prefix[i]
if c == p {
continue
}
// ASCII case-insensitive
if 'A' <= c && c <= 'Z' {
c = c - 'A' + 'a'
}
if 'A' <= p && p <= 'Z' {
p = p - 'A' + 'a'
}
if c != p {
return false
}
}
return true
}
func skipUntilSpace(s string, i int) int {
for i < len(s) {
r, size := rune(s[i]), 1
if r >= 0x80 {
r, size = utf8DecodeRuneInString(s[i:])
}
if unicode.IsSpace(r) {
return i
}
i += size
}
return i
}
func allAlphaNum(s string) bool {
for _, r := range s {
if !(unicode.IsLetter(r) || unicode.IsNumber(r)) {
return false
}
}
return true
}
func isWordStart(r rune) bool { return unicode.IsLetter(r) || unicode.IsNumber(r) }
// Minimal utf8 rune decode without importing utf8 to avoid extra deps elsewhere
func utf8DecodeRuneInString(s string) (r rune, size int) {
// Fallback to standard library if available; however, using basic decoding
for i := 1; i <= 4 && i <= len(s); i++ {
r, size = rune(s[0]), 1
if r < 0x80 {
return r, 1
}
// Use stdlib for correctness
return []rune(s[:i])[0], len(string([]rune(s[:i])[0]))
}
return rune(s[0]), 1
}
// isHex64 returns true if s is exactly 64 hex characters (0-9, a-f)
func isHex64(s string) bool {
if len(s) != 64 {
return false
}
for i := 0; i < 64; i++ {
c := s[i]
if c >= '0' && c <= '9' {
continue
}
if c >= 'a' && c <= 'f' {
continue
}
if c >= 'A' && c <= 'F' {
continue
}
return false
}
return true
}

View File

@@ -1 +1 @@
v0.8.7
v0.9.1