Skip to content
Snippets Groups Projects
Unverified Commit c1297afe authored by Andrew McDermott's avatar Andrew McDermott Committed by GitHub
Browse files

Prevent lost updates in IfaceNameForIndexAndMAC (#716)


* fix(ifaces): avoid potential lost update in IfaceNameForIndexAndMAC

IfaceNameForIndexAndMAC reads the interface cache under RLock, and if
no match is found, performs a syscall fallback (net.InterfaceByIndex)
followed by an unconditional write under Lock.

This creates a potential lost update scenario under contention:

  1. Goroutine A reads r.ifaces[idx][mac], sees missing
  2. Goroutine B does the same
  3. Both perform fallback syscall
  4. A acquires Lock and inserts result
  5. B acquires Lock and overwrites A’s entry

This change avoids that risk by applying double-checked locking: the
map is re-checked under Lock before any write occurs.

No test has been added. The code is already synchronised and correct
at the memory level; the issue is about preserving consistency in
concurrent insertions, and reproducing it reliably would require
coordinated thread interleaving.

Signed-off-by: default avatarAndrew McDermott <amcdermo@redhat.com>

* Add failing test-race

* fix(ifaces): eliminate race condition in IfaceNameForIndexAndMAC

Eliminate a critical read-after-unlock race condition where the
function releases the RLock but continues accessing macsMap without
protection, while other goroutines could concurrently modify the same
map.

This creates a classic data race between:
- Reader threads accessing macsMap (len, iteration, lookup)
- Writer threads modifying the map via Subscribe() or fallback updates

Hold the RLock throughout the entire cache access phase, only
releasing it before each return or before the syscall fallback.

Signed-off-by: default avatarAndrew McDermott <amcdermo@redhat.com>

* Add test for race condition

* Extract cache lookup function

* fix(ifaces): eliminate race condition in IfaceNameForIndexAndMAC

Fix a critical read-after-unlock race condition where the function
released the RLock but continued accessing macsMap without protection,
while other goroutines could concurrently modify the same map.

The race occurred between:
- Reader threads accessing macsMap (len, iteration, lookup)
- Writer threads modifying the map via Subscribe() or fallback updates

Hold the RLock throughout the entire cache access phase, releasing it
only before each return or before the syscall fallback. Use individual
unlock calls rather than defer to prevent deadlock, since RWMutex does
not support lock upgrades from read to write.

Add comment documenting this design choice to prevent future
"optimisations" that would introduce deadlock.

Signed-off-by: default avatarAndrew McDermott <amcdermo@redhat.com>

* fix leaking fd in test

---------

Signed-off-by: default avatarAndrew McDermott <amcdermo@redhat.com>
Co-authored-by: default avatarJoel Takvorian <jtakvori@redhat.com>
parent 45172459
Branches
Tags
No related merge requests found
...@@ -78,13 +78,73 @@ func (r *Registerer) Subscribe(ctx context.Context) (<-chan Event, error) { ...@@ -78,13 +78,73 @@ func (r *Registerer) Subscribe(ctx context.Context) (<-chan Event, error) {
return out, nil return out, nil
} }
// IfaceNameForIndex gets the interface name given an index as recorded by the underlying // IfaceNameForIndexAndMAC returns the interface name for a given
// interfaces' informer. It backs up into the net.InterfaceByIndex function if the interface // interface index and MAC address, using the cached data from prior
// has not been previously registered // events observed by the underlying interfaces informer. It returns
// the interface name and true if a match is found, or an empty string
// and false if not.
//
// If multiple MACs are associated with the same index, this function
// attempts to disambiguate using the provided MAC. If no exact match
// is found, it applies heuristics (e.g., preferred MAC prefix rules)
// to choose a preferred interface name. As a last resort, if no MAC
// match is possible, it returns the first name associated with the
// index to avoid falling back to a syscall.
//
// A fallback to net.InterfaceByIndex is performed only if the index
// is not present in the cache, or the MAC does not match any known
// entry and no heuristic rule applies.
//
// Concurrency note:
//
// Without double-checked locking, the following sequence may occur:
//
// 1. Goroutine A acquires RLock, sees r.ifaces[idx][mac] is missing
// 2. Goroutine B does the same (also sees entry missing)
// 3. Both release RLock and call net.InterfaceByIndex(idx)
// 4. Both prepare to insert iface.Name into r.ifaces[idx][mac]
// 5. Goroutine A acquires Lock and writes to the map
// 6. Goroutine B acquires Lock and overwrites A's result
//
// This results in a lost update. To prevent this, the function uses
// double-checked locking: it re-checks under Lock before inserting,
// ensuring that only one goroutine updates the cache.
func (r *Registerer) IfaceNameForIndexAndMAC(idx int, mac [6]uint8) (string, bool) { func (r *Registerer) IfaceNameForIndexAndMAC(idx int, mac [6]uint8) (string, bool) {
if name, found := r.ifaceCacheLookup(idx, mac); found {
return name, found
}
// Fallback if not found, interfaces lookup
iface, err := net.InterfaceByIndex(idx)
if err != nil {
return "", false
}
foundMAC, err := macToFixed6(iface.HardwareAddr)
if err != nil {
return "", false
}
r.m.Lock()
defer r.m.Unlock()
if current, ok := r.ifaces[idx]; ok {
if existing, exists := current[foundMAC]; exists {
// The entry was populated concurrently during
// our fallback path. Respect the existing
// value to avoid a lost update.
return existing, true
}
current[foundMAC] = iface.Name
} else {
r.ifaces[idx] = map[[6]uint8]string{foundMAC: iface.Name}
}
return iface.Name, true
}
func (r *Registerer) ifaceCacheLookup(idx int, mac [6]uint8) (string, bool) {
r.m.RLock() r.m.RLock()
defer r.m.RUnlock()
macsMap, ok := r.ifaces[idx] macsMap, ok := r.ifaces[idx]
r.m.RUnlock()
if ok { if ok {
if len(macsMap) == 1 { if len(macsMap) == 1 {
// No risk of collision, just return entry without checking for MAC // No risk of collision, just return entry without checking for MAC
...@@ -112,23 +172,7 @@ func (r *Registerer) IfaceNameForIndexAndMAC(idx int, mac [6]uint8) (string, boo ...@@ -112,23 +172,7 @@ func (r *Registerer) IfaceNameForIndexAndMAC(idx int, mac [6]uint8) (string, boo
} }
} }
} }
// Fallback if not found, interfaces lookup return "", false
iface, err := net.InterfaceByIndex(idx)
if err != nil {
return "", false
}
foundMAC, err := macToFixed6(iface.HardwareAddr)
if err != nil {
return "", false
}
r.m.Lock()
if current, ok := r.ifaces[idx]; ok {
current[foundMAC] = iface.Name
} else {
r.ifaces[idx] = map[[6]uint8]string{foundMAC: iface.Name}
}
r.m.Unlock()
return iface.Name, true
} }
type preferredInterface struct { type preferredInterface struct {
......
...@@ -2,6 +2,7 @@ package ifaces ...@@ -2,6 +2,7 @@ package ifaces
import ( import (
"context" "context"
"sync"
"testing" "testing"
"github.com/netobserv/netobserv-ebpf-agent/pkg/config" "github.com/netobserv/netobserv-ebpf-agent/pkg/config"
...@@ -142,6 +143,63 @@ func TestRegisterer_Lookup(t *testing.T) { ...@@ -142,6 +143,63 @@ func TestRegisterer_Lookup(t *testing.T) {
assert.False(t, ok) assert.False(t, ok)
} }
func TestRegisterer_LookupRace(t *testing.T) {
// No assertion here, purpose being to catch races
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
watcher := NewWatcher(10)
registry, err := NewRegisterer(watcher, &config.Agent{BuffersLength: 10})
require.NoError(t, err)
// Start with empty interfaces
watcher.interfaces = func(_ netns.NsHandle, _ string) ([]Interface, error) {
return []Interface{}, nil
}
inputLinks := make(chan netlink.LinkUpdate, 10)
watcher.linkSubscriberAt = func(_ netns.NsHandle, ch chan<- netlink.LinkUpdate, _ <-chan struct{}) error {
go func() {
for link := range inputLinks {
ch <- link
}
}()
return nil
}
// Process & consume interface events
eventsCh, err := registry.Subscribe(ctx)
require.NoError(t, err)
go func() {
for {
<-eventsCh
}
}()
wg := sync.WaitGroup{}
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
defer wg.Done()
_, _ = registry.IfaceNameForIndexAndMAC(1, macFoo)
_, _ = registry.IfaceNameForIndexAndMAC(2, macBar)
_, _ = registry.IfaceNameForIndexAndMAC(3, macBaz)
_, _ = registry.IfaceNameForIndexAndMAC(3, macBae)
}()
wg.Add(1)
go func() {
defer wg.Done()
inputLinks <- upAndRunning("bae", 3, macBae[:], netns.None())
inputLinks <- upAndRunning("bar", 2, macBar[:], netns.None())
inputLinks <- down("bae", 3, macBae[:], netns.None())
inputLinks <- down("bar", 2, macBar[:], netns.None())
}()
}
wg.Wait()
}
func TestRegisterer_PreferredInterfacesEdgeCases(t *testing.T) { func TestRegisterer_PreferredInterfacesEdgeCases(t *testing.T) {
pref, err := newPreferredInterfaces("") pref, err := newPreferredInterfaces("")
require.NoError(t, err) require.NoError(t, err)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment