Add spider functionality for relay crawling, marker management, and new SpiderMode config.

- Introduced the `spider` package for relay crawling, including periodic tasks and one-time sync capabilities.
- Added `SetMarker`, `GetMarker`, `HasMarker`, and `DeleteMarker` methods in the database for marker management.
- Updated configuration with `SpiderMode` and `SpiderFrequency` options to enable and customize spider behavior.
- Integrated `spider` initialization into the main application flow.
- Improved tag handling, NIP-70 compliance, and protected tag validation in event processing.
- Removed unnecessary logging and replaced `errorf` with `fmt.Errorf` for better error handling.
- Incremented version to `v0.5.0`.
This commit is contained in:
2025-09-20 13:46:22 +01:00
parent 4532def9f5
commit 20fbce9263
11 changed files with 601 additions and 133 deletions

View File

@@ -23,23 +23,25 @@ import (
// and default values. It defines parameters for app behaviour, storage
// locations, logging, and network settings used across the relay service.
type C struct {
AppName string `env:"ORLY_APP_NAME" usage:"set a name to display on information about the relay" default:"ORLY"`
DataDir string `env:"ORLY_DATA_DIR" usage:"storage location for the event store" default:"~/.local/share/ORLY"`
Listen string `env:"ORLY_LISTEN" default:"0.0.0.0" usage:"network listen address"`
Port int `env:"ORLY_PORT" default:"3334" usage:"port to listen on"`
HealthPort int `env:"ORLY_HEALTH_PORT" default:"0" usage:"optional health check HTTP port; 0 disables"`
EnableShutdown bool `env:"ORLY_ENABLE_SHUTDOWN" default:"false" usage:"if true, expose /shutdown on the health port to gracefully stop the process (for profiling)"`
LogLevel string `env:"ORLY_LOG_LEVEL" default:"info" usage:"relay log level: fatal error warn info debug trace"`
DBLogLevel string `env:"ORLY_DB_LOG_LEVEL" default:"info" usage:"database log level: fatal error warn info debug trace"`
LogToStdout bool `env:"ORLY_LOG_TO_STDOUT" default:"false" usage:"log to stdout instead of stderr"`
Pprof string `env:"ORLY_PPROF" usage:"enable pprof in modes: cpu,memory,allocation,heap,block,goroutine,threadcreate,mutex"`
PprofPath string `env:"ORLY_PPROF_PATH" usage:"optional directory to write pprof profiles into (inside container); default is temporary dir"`
PprofHTTP bool `env:"ORLY_PPROF_HTTP" default:"false" usage:"if true, expose net/http/pprof on port 6060"`
OpenPprofWeb bool `env:"ORLY_OPEN_PPROF_WEB" default:"false" usage:"if true, automatically open the pprof web viewer when profiling is enabled"`
IPWhitelist []string `env:"ORLY_IP_WHITELIST" usage:"comma-separated list of IP addresses to allow access from, matches on prefixes to allow private subnets, eg 10.0.0 = 10.0.0.0/8"`
Admins []string `env:"ORLY_ADMINS" usage:"comma-separated list of admin npubs"`
Owners []string `env:"ORLY_OWNERS" usage:"comma-separated list of owner npubs, who have full control of the relay for wipe and restart and other functions"`
ACLMode string `env:"ORLY_ACL_MODE" usage:"ACL mode: follows,none" default:"none"`
AppName string `env:"ORLY_APP_NAME" usage:"set a name to display on information about the relay" default:"ORLY"`
DataDir string `env:"ORLY_DATA_DIR" usage:"storage location for the event store" default:"~/.local/share/ORLY"`
Listen string `env:"ORLY_LISTEN" default:"0.0.0.0" usage:"network listen address"`
Port int `env:"ORLY_PORT" default:"3334" usage:"port to listen on"`
HealthPort int `env:"ORLY_HEALTH_PORT" default:"0" usage:"optional health check HTTP port; 0 disables"`
EnableShutdown bool `env:"ORLY_ENABLE_SHUTDOWN" default:"false" usage:"if true, expose /shutdown on the health port to gracefully stop the process (for profiling)"`
LogLevel string `env:"ORLY_LOG_LEVEL" default:"info" usage:"relay log level: fatal error warn info debug trace"`
DBLogLevel string `env:"ORLY_DB_LOG_LEVEL" default:"info" usage:"database log level: fatal error warn info debug trace"`
LogToStdout bool `env:"ORLY_LOG_TO_STDOUT" default:"false" usage:"log to stdout instead of stderr"`
Pprof string `env:"ORLY_PPROF" usage:"enable pprof in modes: cpu,memory,allocation,heap,block,goroutine,threadcreate,mutex"`
PprofPath string `env:"ORLY_PPROF_PATH" usage:"optional directory to write pprof profiles into (inside container); default is temporary dir"`
PprofHTTP bool `env:"ORLY_PPROF_HTTP" default:"false" usage:"if true, expose net/http/pprof on port 6060"`
OpenPprofWeb bool `env:"ORLY_OPEN_PPROF_WEB" default:"false" usage:"if true, automatically open the pprof web viewer when profiling is enabled"`
IPWhitelist []string `env:"ORLY_IP_WHITELIST" usage:"comma-separated list of IP addresses to allow access from, matches on prefixes to allow private subnets, eg 10.0.0 = 10.0.0.0/8"`
Admins []string `env:"ORLY_ADMINS" usage:"comma-separated list of admin npubs"`
Owners []string `env:"ORLY_OWNERS" usage:"comma-separated list of owner npubs, who have full control of the relay for wipe and restart and other functions"`
ACLMode string `env:"ORLY_ACL_MODE" usage:"ACL mode: follows,none" default:"none"`
SpiderMode string `env:"ORLY_SPIDER_MODE" usage:"spider mode: none,follow" default:"none"`
SpiderFrequency time.Duration `env:"ORLY_SPIDER_FREQUENCY" usage:"spider frequency in seconds" default:"1h"`
}
// New creates and initializes a new configuration object for the relay

View File

@@ -103,6 +103,20 @@ func (l *Listener) HandleEvent(msg []byte) (err error) {
// user has write access or better, continue
// log.D.F("user has %s access", accessLevel)
}
// check for protected tag (NIP-70)
protectedTag := env.E.Tags.GetFirst([]byte("-"))
if protectedTag != nil && acl.Registry.Active.Load() != "none" {
// check that the pubkey of the event matches the authed pubkey
if !utils.FastEqual(l.authedPubkey.Load(), env.E.Pubkey) {
if err = Ok.Blocked(
l, env,
"protected tag may only be published by user authed to the same pubkey",
); chk.E(err) {
return
}
return
}
}
// if the event is a delete, process the delete
if env.E.Kind == kind.EventDeletion.K {
if err = l.HandleDelete(env); err != nil {

View File

@@ -1,9 +1,9 @@
package app
import (
"fmt"
"lol.mleku.dev/chk"
"lol.mleku.dev/errorf"
"lol.mleku.dev/log"
"next.orly.dev/pkg/encoders/envelopes"
"next.orly.dev/pkg/encoders/envelopes/authenvelope"
"next.orly.dev/pkg/encoders/envelopes/closeenvelope"
@@ -13,7 +13,7 @@ import (
)
func (l *Listener) HandleMessage(msg []byte, remote string) {
log.D.F("%s received message:\n%s", remote, msg)
// log.D.F("%s received message:\n%s", remote, msg)
var err error
var t string
var rem []byte
@@ -32,7 +32,7 @@ func (l *Listener) HandleMessage(msg []byte, remote string) {
// log.D.F("authenvelope: %s %s", remote, rem)
err = l.HandleAuth(rem)
default:
err = errorf.E("unknown envelope type %s\n%s", t, rem)
err = fmt.Errorf("unknown envelope type %s\n%s", t, rem)
}
}
if err != nil {
@@ -43,7 +43,7 @@ func (l *Listener) HandleMessage(msg []byte, remote string) {
// )
// },
// )
if err = noticeenvelope.NewFrom(err.Error()).Write(l); chk.E(err) {
if err = noticeenvelope.NewFrom(err.Error()).Write(l); err != nil {
return
}
}

View File

@@ -9,7 +9,7 @@ import (
"github.com/dgraph-io/badger/v4"
"lol.mleku.dev/chk"
"lol.mleku.dev/log"
acl "next.orly.dev/pkg/acl"
"next.orly.dev/pkg/acl"
"next.orly.dev/pkg/encoders/envelopes/authenvelope"
"next.orly.dev/pkg/encoders/envelopes/closedenvelope"
"next.orly.dev/pkg/encoders/envelopes/eoseenvelope"
@@ -22,21 +22,21 @@ import (
"next.orly.dev/pkg/encoders/kind"
"next.orly.dev/pkg/encoders/reason"
"next.orly.dev/pkg/encoders/tag"
utils "next.orly.dev/pkg/utils"
"next.orly.dev/pkg/utils"
"next.orly.dev/pkg/utils/normalize"
"next.orly.dev/pkg/utils/pointers"
)
func (l *Listener) HandleReq(msg []byte) (err error) {
log.T.F("HandleReq: START processing from %s\n%s\n", l.remote, msg)
var rem []byte
// log.T.F("HandleReq: START processing from %s\n%s\n", l.remote, msg)
// var rem []byte
env := reqenvelope.New()
if rem, err = env.Unmarshal(msg); chk.E(err) {
if _, err = env.Unmarshal(msg); chk.E(err) {
return normalize.Error.Errorf(err.Error())
}
if len(rem) > 0 {
log.I.F("REQ extra bytes: '%s'", rem)
}
// if len(rem) > 0 {
// log.I.F("REQ extra bytes: '%s'", rem)
// }
// send a challenge to the client to auth if an ACL is active
if acl.Registry.Active.Load() != "none" {
if err = authenvelope.NewChallengeWith(l.challenge.Load()).
@@ -57,59 +57,59 @@ func (l *Listener) HandleReq(msg []byte) (err error) {
return
default:
// user has read access or better, continue
log.D.F("user has %s access", accessLevel)
// log.D.F("user has %s access", accessLevel)
}
var events event.S
for _, f := range *env.Filters {
idsLen := 0
kindsLen := 0
authorsLen := 0
tagsLen := 0
if f != nil {
if f.Ids != nil {
idsLen = f.Ids.Len()
}
if f.Kinds != nil {
kindsLen = f.Kinds.Len()
}
if f.Authors != nil {
authorsLen = f.Authors.Len()
}
if f.Tags != nil {
tagsLen = f.Tags.Len()
}
}
log.T.F(
"REQ %s: filter summary ids=%d kinds=%d authors=%d tags=%d",
env.Subscription, idsLen, kindsLen, authorsLen, tagsLen,
)
// idsLen := 0
// kindsLen := 0
// authorsLen := 0
// tagsLen := 0
// if f != nil {
// if f.Ids != nil {
// idsLen = f.Ids.Len()
// }
// if f.Kinds != nil {
// kindsLen = f.Kinds.Len()
// }
// if f.Authors != nil {
// authorsLen = f.Authors.Len()
// }
// if f.Tags != nil {
// tagsLen = f.Tags.Len()
// }
// }
// log.T.F(
// "REQ %s: filter summary ids=%d kinds=%d authors=%d tags=%d",
// env.Subscription, idsLen, kindsLen, authorsLen, tagsLen,
// )
if f != nil && f.Authors != nil && f.Authors.Len() > 0 {
var authors []string
for _, a := range f.Authors.T {
authors = append(authors, hex.Enc(a))
}
log.T.F("REQ %s: authors=%v", env.Subscription, authors)
// log.T.F("REQ %s: authors=%v", env.Subscription, authors)
}
if f != nil && f.Kinds != nil && f.Kinds.Len() > 0 {
log.T.F("REQ %s: kinds=%v", env.Subscription, f.Kinds.ToUint16())
}
if f != nil && f.Ids != nil && f.Ids.Len() > 0 {
var ids []string
for _, id := range f.Ids.T {
ids = append(ids, hex.Enc(id))
}
var lim any
if pointers.Present(f.Limit) {
lim = *f.Limit
} else {
lim = nil
}
log.T.F(
"REQ %s: ids filter count=%d ids=%v limit=%v", env.Subscription,
f.Ids.Len(), ids, lim,
)
}
if pointers.Present(f.Limit) {
// if f != nil && f.Kinds != nil && f.Kinds.Len() > 0 {
// log.T.F("REQ %s: kinds=%v", env.Subscription, f.Kinds.ToUint16())
// }
// if f != nil && f.Ids != nil && f.Ids.Len() > 0 {
// var ids []string
// for _, id := range f.Ids.T {
// ids = append(ids, hex.Enc(id))
// }
// // var lim any
// // if pointers.Present(f.Limit) {
// // lim = *f.Limit
// // } else {
// // lim = nil
// // }
// // log.T.F(
// // "REQ %s: ids filter count=%d ids=%v limit=%v", env.Subscription,
// // f.Ids.Len(), ids, lim,
// // )
// }
if f != nil && pointers.Present(f.Limit) {
if *f.Limit == 0 {
continue
}
@@ -119,15 +119,15 @@ func (l *Listener) HandleReq(msg []byte) (err error) {
context.Background(), 30*time.Second,
)
defer cancel()
log.T.F(
"HandleReq: About to QueryEvents for %s, main context done: %v",
l.remote, l.ctx.Err() != nil,
)
// log.T.F(
// "HandleReq: About to QueryEvents for %s, main context done: %v",
// l.remote, l.ctx.Err() != nil,
// )
if events, err = l.QueryEvents(queryCtx, f); chk.E(err) {
if errors.Is(err, badger.ErrDBClosed) {
return
}
log.T.F("HandleReq: QueryEvents error for %s: %v", l.remote, err)
// log.T.F("HandleReq: QueryEvents error for %s: %v", l.remote, err)
err = nil
}
defer func() {
@@ -135,23 +135,23 @@ func (l *Listener) HandleReq(msg []byte) (err error) {
ev.Free()
}
}()
log.T.F(
"HandleReq: QueryEvents completed for %s, found %d events",
l.remote, len(events),
)
// log.T.F(
// "HandleReq: QueryEvents completed for %s, found %d events",
// l.remote, len(events),
// )
}
var tmp event.S
privCheck:
for _, ev := range events {
if kind.IsPrivileged(ev.Kind) &&
accessLevel != "admin" { // admins can see all events
log.T.C(
func() string {
return fmt.Sprintf(
"checking privileged event %0x", ev.ID,
)
},
)
// log.T.C(
// func() string {
// return fmt.Sprintf(
// "checking privileged event %0x", ev.ID,
// )
// },
// )
pk := l.authedPubkey.Load()
if pk == nil {
continue
@@ -175,26 +175,26 @@ privCheck:
continue
}
if utils.FastEqual(pt, pk) {
log.T.C(
func() string {
return fmt.Sprintf(
"privileged event %s is for logged in pubkey %0x",
ev.ID, pk,
)
},
)
// log.T.C(
// func() string {
// return fmt.Sprintf(
// "privileged event %s is for logged in pubkey %0x",
// ev.ID, pk,
// )
// },
// )
tmp = append(tmp, ev)
continue privCheck
}
}
log.T.C(
func() string {
return fmt.Sprintf(
"privileged event %s does not contain the logged in pubkey %0x",
ev.ID, pk,
)
},
)
// log.T.C(
// func() string {
// return fmt.Sprintf(
// "privileged event %s does not contain the logged in pubkey %0x",
// ev.ID, pk,
// )
// },
// )
} else {
tmp = append(tmp, ev)
}
@@ -202,19 +202,19 @@ privCheck:
events = tmp
seen := make(map[string]struct{})
for _, ev := range events {
log.D.C(
func() string {
return fmt.Sprintf(
"REQ %s: sending EVENT id=%s kind=%d", env.Subscription,
hex.Enc(ev.ID), ev.Kind,
)
},
)
log.T.C(
func() string {
return fmt.Sprintf("event:\n%s\n", ev.Serialize())
},
)
// log.D.C(
// func() string {
// return fmt.Sprintf(
// "REQ %s: sending EVENT id=%s kind=%d", env.Subscription,
// hex.Enc(ev.ID), ev.Kind,
// )
// },
// )
// log.T.C(
// func() string {
// return fmt.Sprintf("event:\n%s\n", ev.Serialize())
// },
// )
var res *eventenvelope.Result
if res, err = eventenvelope.NewResultWith(
env.Subscription, ev,
@@ -229,7 +229,7 @@ privCheck:
}
// write the EOSE to signal to the client that all events found have been
// sent.
log.T.F("sending EOSE to %s", l.remote)
// log.T.F("sending EOSE to %s", l.remote)
if err = eoseenvelope.NewFrom(env.Subscription).
Write(l); chk.E(err) {
return
@@ -237,10 +237,10 @@ privCheck:
// if the query was for just Ids, we know there can't be any more results,
// so cancel the subscription.
cancel := true
log.T.F(
"REQ %s: computing cancel/subscription; events_sent=%d",
env.Subscription, len(events),
)
// log.T.F(
// "REQ %s: computing cancel/subscription; events_sent=%d",
// env.Subscription, len(events),
// )
var subbedFilters filter.S
for _, f := range *env.Filters {
if f.Ids.Len() < 1 {
@@ -255,10 +255,10 @@ privCheck:
}
notFounds = append(notFounds, id)
}
log.T.F(
"REQ %s: ids outstanding=%d of %d", env.Subscription,
len(notFounds), f.Ids.Len(),
)
// log.T.F(
// "REQ %s: ids outstanding=%d of %d", env.Subscription,
// len(notFounds), f.Ids.Len(),
// )
// if all were found, don't add to subbedFilters
if len(notFounds) == 0 {
continue
@@ -295,6 +295,6 @@ privCheck:
return
}
}
log.T.F("HandleReq: COMPLETED processing from %s", l.remote)
// log.T.F("HandleReq: COMPLETED processing from %s", l.remote)
return
}

View File

@@ -97,7 +97,7 @@ whitelist:
}
var typ websocket.MessageType
var msg []byte
log.T.F("waiting for message from %s", remote)
// log.T.F("waiting for message from %s", remote)
// Create a read context with timeout to prevent indefinite blocking
readCtx, readCancel := context.WithTimeout(ctx, DefaultReadTimeout)
@@ -152,7 +152,7 @@ whitelist:
writeCancel()
continue
}
log.T.F("received message from %s: %s", remote, string(msg))
// log.T.F("received message from %s: %s", remote, string(msg))
go listener.HandleMessage(msg, remote)
}
}

View File

@@ -18,6 +18,7 @@ import (
"next.orly.dev/app/config"
"next.orly.dev/pkg/acl"
"next.orly.dev/pkg/database"
"next.orly.dev/pkg/spider"
"next.orly.dev/pkg/version"
)
@@ -158,6 +159,12 @@ func main() {
}
acl.Registry.Syncer()
// Initialize and start spider functionality if enabled
spiderCtx, spiderCancel := context.WithCancel(ctx)
spiderInstance := spider.New(db, cfg, spiderCtx, spiderCancel)
spiderInstance.Start()
defer spiderInstance.Stop()
// Start HTTP pprof server if enabled
if cfg.PprofHTTP {
pprofAddr := fmt.Sprintf("%s:%d", cfg.Listen, 6060)

View File

@@ -360,6 +360,16 @@ func (f *Follows) Syncer() {
f.updated <- struct{}{}
}
// GetFollowedPubkeys returns a copy of the followed pubkeys list
func (f *Follows) GetFollowedPubkeys() [][]byte {
f.followsMx.RLock()
defer f.followsMx.RUnlock()
followedPubkeys := make([][]byte, len(f.follows))
copy(followedPubkeys, f.follows)
return followedPubkeys
}
func init() {
log.T.F("registering follows ACL")
Registry.Register(new(Follows))

62
pkg/database/markers.go Normal file
View File

@@ -0,0 +1,62 @@
package database
import (
"github.com/dgraph-io/badger/v4"
"lol.mleku.dev/chk"
)
const (
markerPrefix = "MARKER:"
)
// SetMarker stores an arbitrary marker in the database
func (d *D) SetMarker(key string, value []byte) (err error) {
markerKey := []byte(markerPrefix + key)
err = d.Update(func(txn *badger.Txn) error {
return txn.Set(markerKey, value)
})
return
}
// GetMarker retrieves an arbitrary marker from the database
func (d *D) GetMarker(key string) (value []byte, err error) {
markerKey := []byte(markerPrefix + key)
err = d.View(func(txn *badger.Txn) error {
item, err := txn.Get(markerKey)
if err != nil {
return err
}
value, err = item.ValueCopy(nil)
return err
})
return
}
// HasMarker checks if a marker exists in the database
func (d *D) HasMarker(key string) (exists bool) {
markerKey := []byte(markerPrefix + key)
err := d.View(func(txn *badger.Txn) error {
_, err := txn.Get(markerKey)
return err
})
exists = !chk.E(err)
return
}
// DeleteMarker removes a marker from the database
func (d *D) DeleteMarker(key string) (err error) {
markerKey := []byte(markerPrefix + key)
err = d.Update(func(txn *badger.Txn) error {
return txn.Delete(markerKey)
})
return
}

View File

@@ -24,7 +24,8 @@ func NewSWithCap(c int) (s *S) {
func (s *S) Len() int {
if s == nil {
panic("tags cannot be used without initialization")
return 0
// panic("tags cannot be used without initialization")
}
return len(*s)
}

372
pkg/spider/spider.go Normal file
View File

@@ -0,0 +1,372 @@
package spider
import (
"context"
"strconv"
"strings"
"time"
"lol.mleku.dev/chk"
"lol.mleku.dev/log"
"next.orly.dev/app/config"
"next.orly.dev/pkg/acl"
"next.orly.dev/pkg/database"
"next.orly.dev/pkg/database/indexes/types"
"next.orly.dev/pkg/encoders/filter"
"next.orly.dev/pkg/encoders/kind"
"next.orly.dev/pkg/encoders/tag"
"next.orly.dev/pkg/encoders/timestamp"
"next.orly.dev/pkg/protocol/ws"
"next.orly.dev/pkg/utils/normalize"
)
const (
OneTimeSpiderSyncMarker = "spider_one_time_sync_completed"
SpiderLastScanMarker = "spider_last_scan_time"
)
type Spider struct {
db *database.D
cfg *config.C
ctx context.Context
cancel context.CancelFunc
}
func New(
db *database.D, cfg *config.C, ctx context.Context,
cancel context.CancelFunc,
) *Spider {
return &Spider{
db: db,
cfg: cfg,
ctx: ctx,
cancel: cancel,
}
}
// Start initializes the spider functionality based on configuration
func (s *Spider) Start() {
if s.cfg.SpiderMode != "follows" {
log.D.Ln("Spider mode is not set to 'follows', skipping spider functionality")
return
}
log.I.Ln("Starting spider in follow mode")
// Check if one-time sync has been completed
if !s.db.HasMarker(OneTimeSpiderSyncMarker) {
log.I.Ln("Performing one-time spider sync back one month")
go s.performOneTimeSync()
} else {
log.D.Ln("One-time spider sync already completed, skipping")
}
// Start periodic scanning
go s.startPeriodicScanning()
}
// performOneTimeSync performs the initial sync going back one month
func (s *Spider) performOneTimeSync() {
defer func() {
// Mark the one-time sync as completed
timestamp := strconv.FormatInt(time.Now().Unix(), 10)
if err := s.db.SetMarker(
OneTimeSpiderSyncMarker, []byte(timestamp),
); err != nil {
log.E.F("Failed to set one-time sync marker: %v", err)
} else {
log.I.Ln("One-time spider sync completed and marked")
}
}()
// Calculate the time one month ago
oneMonthAgo := time.Now().AddDate(0, -1, 0)
log.I.F("Starting one-time spider sync from %v", oneMonthAgo)
// Perform the sync (placeholder - would need actual implementation based on follows)
if err := s.performSync(oneMonthAgo, time.Now()); err != nil {
log.E.F("One-time spider sync failed: %v", err)
return
}
log.I.Ln("One-time spider sync completed successfully")
}
// startPeriodicScanning starts the regular scanning process
func (s *Spider) startPeriodicScanning() {
ticker := time.NewTicker(s.cfg.SpiderFrequency)
defer ticker.Stop()
log.I.F("Starting periodic spider scanning every %v", s.cfg.SpiderFrequency)
for {
select {
case <-s.ctx.Done():
log.D.Ln("Spider periodic scanning stopped due to context cancellation")
return
case <-ticker.C:
s.performPeriodicScan()
}
}
}
// performPeriodicScan performs the regular scan of the last two hours (double the frequency window)
func (s *Spider) performPeriodicScan() {
// Calculate the scanning window (double the frequency period)
scanWindow := s.cfg.SpiderFrequency * 2
scanStart := time.Now().Add(-scanWindow)
scanEnd := time.Now()
log.D.F(
"Performing periodic spider scan from %v to %v (window: %v)", scanStart,
scanEnd, scanWindow,
)
if err := s.performSync(scanStart, scanEnd); err != nil {
log.E.F("Periodic spider scan failed: %v", err)
return
}
// Update the last scan marker
timestamp := strconv.FormatInt(time.Now().Unix(), 10)
if err := s.db.SetMarker(
SpiderLastScanMarker, []byte(timestamp),
); err != nil {
log.E.F("Failed to update last scan marker: %v", err)
}
log.D.F("Periodic spider scan completed successfully")
}
// performSync performs the actual sync operation for the given time range
func (s *Spider) performSync(startTime, endTime time.Time) error {
log.D.F(
"Spider sync from %v to %v - starting implementation", startTime,
endTime,
)
// 1. Check ACL mode is set to "follows"
if s.cfg.ACLMode != "follows" {
log.D.F(
"Spider sync skipped - ACL mode is not 'follows' (current: %s)",
s.cfg.ACLMode,
)
return nil
}
// 2. Get the list of followed users from the ACL system
followedPubkeys, err := s.getFollowedPubkeys()
if err != nil {
return err
}
if len(followedPubkeys) == 0 {
log.D.Ln("Spider sync: no followed pubkeys found")
return nil
}
log.D.F("Spider sync: found %d followed pubkeys", len(followedPubkeys))
// 3. Discover relay lists from followed users
relayURLs, err := s.discoverRelays(followedPubkeys)
if err != nil {
return err
}
if len(relayURLs) == 0 {
log.W.Ln("Spider sync: no relays discovered from followed users")
return nil
}
log.I.F("Spider sync: discovered %d relay URLs", len(relayURLs))
// 4. Query each relay for events from followed pubkeys in the time range
eventsFound := 0
for _, relayURL := range relayURLs {
count, err := s.queryRelayForEvents(
relayURL, followedPubkeys, startTime, endTime,
)
if err != nil {
log.E.F("Spider sync: error querying relay %s: %v", relayURL, err)
continue
}
eventsFound += count
}
log.I.F(
"Spider sync completed: found %d new events from %d relays",
eventsFound, len(relayURLs),
)
return nil
}
// getFollowedPubkeys retrieves the list of followed pubkeys from the ACL system
func (s *Spider) getFollowedPubkeys() ([][]byte, error) {
// Access the ACL registry to get the current ACL instance
var followedPubkeys [][]byte
// Get all ACL instances and find the active one
for _, aclInstance := range acl.Registry.ACL {
if aclInstance.Type() == acl.Registry.Active.Load() {
// Cast to *Follows to access the follows field
if followsACL, ok := aclInstance.(*acl.Follows); ok {
followedPubkeys = followsACL.GetFollowedPubkeys()
break
}
}
}
return followedPubkeys, nil
}
// discoverRelays discovers relay URLs from kind 10002 events of followed users
func (s *Spider) discoverRelays(followedPubkeys [][]byte) ([]string, error) {
seen := make(map[string]struct{})
var urls []string
for _, pubkey := range followedPubkeys {
// Query for kind 10002 (RelayListMetadata) events from this pubkey
fl := &filter.F{
Authors: tag.NewFromAny(pubkey),
Kinds: kind.NewS(kind.New(kind.RelayListMetadata.K)),
}
idxs, err := database.GetIndexesFromFilter(fl)
if chk.E(err) {
continue
}
var sers types.Uint40s
for _, idx := range idxs {
s, err := s.db.GetSerialsByRange(idx)
if chk.E(err) {
continue
}
sers = append(sers, s...)
}
for _, ser := range sers {
ev, err := s.db.FetchEventBySerial(ser)
if chk.E(err) || ev == nil {
continue
}
// Extract relay URLs from 'r' tags
for _, v := range ev.Tags.GetAll([]byte("r")) {
u := string(v.Value())
n := string(normalize.URL(u))
if n == "" {
continue
}
if _, ok := seen[n]; ok {
continue
}
seen[n] = struct{}{}
urls = append(urls, n)
}
}
}
return urls, nil
}
// queryRelayForEvents connects to a relay and queries for events from followed pubkeys
func (s *Spider) queryRelayForEvents(
relayURL string, followedPubkeys [][]byte, startTime, endTime time.Time,
) (int, error) {
log.T.F("Spider sync: querying relay %s", relayURL)
// Connect to the relay with a timeout context
ctx, cancel := context.WithTimeout(s.ctx, 30*time.Second)
defer cancel()
client, err := ws.RelayConnect(ctx, relayURL)
if err != nil {
return 0, err
}
defer client.Close()
// Create filter for the time range and followed pubkeys
f := &filter.F{
Authors: tag.NewFromBytesSlice(followedPubkeys...),
Since: timestamp.FromUnix(startTime.Unix()),
Until: timestamp.FromUnix(endTime.Unix()),
Limit: func() *uint { l := uint(1000); return &l }(), // Limit to avoid overwhelming
}
// Subscribe to get events
sub, err := client.Subscribe(ctx, filter.NewS(f))
if err != nil {
return 0, err
}
defer sub.Unsub()
eventsCount := 0
eventsSaved := 0
timeout := time.After(10 * time.Second) // Timeout for receiving events
for {
select {
case <-ctx.Done():
log.T.F(
"Spider sync: context done for relay %s, saved %d/%d events",
relayURL, eventsSaved, eventsCount,
)
return eventsSaved, nil
case <-timeout:
log.T.F(
"Spider sync: timeout for relay %s, saved %d/%d events",
relayURL, eventsSaved, eventsCount,
)
return eventsSaved, nil
case <-sub.EndOfStoredEvents:
log.T.F(
"Spider sync: end of stored events for relay %s, saved %d/%d events",
relayURL, eventsSaved, eventsCount,
)
return eventsSaved, nil
case ev := <-sub.Events:
if ev == nil {
continue
}
eventsCount++
// Verify the event signature
if ok, err := ev.Verify(); !ok || err != nil {
log.T.F(
"Spider sync: invalid event signature from relay %s",
relayURL,
)
ev.Free()
continue
}
// Save the event to the database
if _, _, err := s.db.SaveEvent(s.ctx, ev); err != nil {
if !strings.HasPrefix(err.Error(), "blocked:") {
log.T.F(
"Spider sync: error saving event from relay %s: %v",
relayURL, err,
)
}
// Event might already exist, which is fine for deduplication
} else {
eventsSaved++
if eventsSaved%10 == 0 {
log.T.F(
"Spider sync: saved %d events from relay %s",
eventsSaved, relayURL,
)
}
}
ev.Free()
}
}
}
// Stop stops the spider functionality
func (s *Spider) Stop() {
log.D.Ln("Stopping spider")
s.cancel()
}

View File

@@ -1 +1 @@
v0.4.9
v0.5.0