Again, the semi-fixed code fragment is as follows:
1 struct foo { 2 struct list_head list; 3 int key; 4 int data; 5 }; 6 7 LIST_HEAD(mylist); 8 DEFINE_SPINLOCK(mylock); 9 struct foo *cache; 10 11 int search(int key, int *data) 12 { 13 struct foo *p; 14 15 rcu_read_lock(); 16 p = rcu_dereference(cache); 17 if (p != NULL && p->key == key) 18 goto found; 19 list_for_each_entry_rcu(p, &mylist, list) 20 if (p->key == key) { 21 rcu_assign_pointer(cache, p); 22 goto found; 23 } 24 rcu_read_unlock(); 25 return -ENOENT; 26 found: 27 *data = p->data; 28 rcu_read_unlock(); 29 return 0; 30 } 31 32 int insert(int key, int data) 33 { 34 struct foo *p = kmalloc(sizeof(*p), GFP_KERNEL); 35 36 if (p == NULL) 37 return -ENOMEM; 38 p->key = key; 39 p->data = data; 40 spin_lock(&mylock); 41 list_add_rcu(&p->list, &mylist); 42 spin_unlock(&mylock); 43 } 44 45 int delete(int key) 46 { 47 struct foo *p; 48 49 spin_lock(&mylock); 50 list_for_each_entry(p, &mylist, list) 51 if (p->key == key) { 52 list_del_rcu(&p->list); 53 RCU_INIT_POINTER(cache, NULL); 54 spin_unlock(&mylock); 55 synchronize_rcu(); 56 free(p); 57 return 0; 58 } 59 spin_unlock(&mylock); 60 return -ENOENT; 61 }
Can you spot the other bug?
To see it, consider the following sequence of events:
search()
and locates
the desired data element, but is preempted (or otherwise delayed)
just before executing line 21.
delete()
, and deletes the
data element that CPU 0 just located.
It clears out the cache (line 53) and then
starts a grace period on line 55, which cannot
complete until CPU 0 leaves its RCU read-side critical
section.
So far, so good.
search()
and accesses
the cached (but now freed!) element on line 17.
Arbitrarily bad things happen after this use-after-free access.
One fix is as follows:
1 int delete(int key) 2 { 3 struct foo *p; 4 5 spin_lock(&mylock); 6 list_for_each_entry(p, &mylist, list) 7 if (p->key == key) { 8 list_del_rcu(&p->list); 9 spin_unlock(&mylock); 10 synchronize_rcu(); 11 RCU_INIT_POINTER(cache, NULL); 12 synchronize_rcu(); 13 free(p); 14 return 0; 15 } 16 spin_unlock(&mylock); 17 return -ENOENT; 18 }
Line 10 waits for all readers who might be accessing the newly removed data element to complete (each of which might install the item into the cache), line 11 clears the cache, and then line 12 waits for all readers accessing the element from the cache to get done. It is then safe to free the element.
The moral of this story is that you need to be extremely careful about handling all potential pointers to the newly deleted element. Another possible moral is that the day of the old cache-the-last-element trick might well be well and truly over.