Can You Find the RCU-Usage Bug?

Again, the code fragment is as follows:

struct foo {
	struct list_head list;
	int key;
	int data;
};

LIST_HEAD(mylist);
DEFINE_SPINLOCK(mylock);
struct foo *cache;

int search(int key, int *data)
{
	struct foo *p;

	rcu_read_lock();
	p = rcu_dereference(cache);
	if (p != NULL && p->key == key)
		goto found;
	list_for_each_entry_rcu(p, &mylist, list)
		if (p->key == key) {
			rcu_assign_pointer(cache, p);
			goto found;
		}
	rcu_read_unlock();
	return -ENOENT;
found:
	*data = p->data;
	rcu_read_unlock();
	return 0;
}

int insert(int key, int data)
{
	struct foo *p = kmalloc(sizeof(*p), GFP_KERNEL);

	if (p == NULL)
		return -ENOMEM;
	p->key = key;
	p->data = data;
	spin_lock(&mylock);
	list_add_rcu(&p->list, &mylist);
	spin_unlock(&mylock);
}

int delete(int key)
{
	struct foo *p;

	spin_lock(&mylock);
	list_for_each_entry(p, &mylist, list)
		if (p->key == key) {
			list_del_rcu(&p->list);
			spin_unlock(&mylock);
			synchronize_rcu();
			kfree(p);
			return 0;
		}
	spin_unlock(&mylock);
	return -ENOENT;
}

So, can you spot the bug, which appeared in my first-ever attempt to use RCU back in the early 1990s? (A similar bug in the Linux kernel has recently been fixed, however, as noted in the comments, there are some important differences. Remember them, and some future posting will be quite easy for you to solve.)

The problem is that the delete() function fails to check the cache pointer. This means that after delete() returns, cache might point into the freelist, which could cause immense trouble for a future call to search().

One fix is as follows:

int delete(int key)
{
	struct foo *p;

	spin_lock(&mylock);
	list_for_each_entry(p, &mylist, list)
		if (p->key == key) {
			list_del_rcu(&p->list);
			rcu_assign_pointer(cache, NULL);
			spin_unlock(&mylock);
			synchronize_rcu();
			free(p);
			return 0;
		}
	spin_unlock(&mylock);
	return -ENOENT;
}

You could of course check to see if the cache pointer actually points to the element being deleted, but if the difference matters a lot, other adjustments are probably in order.

Another question is how to catch bugs of this sort. This is effectively a use-after-free sort of bug, so one approach is to mark the object invalid once the grace period has finished, adding an ->invalid field to the structure for this purpose. The delete() function might then appear as follows:

int delete(int key)
{
	struct foo *p;

	spin_lock(&mylock);
	list_for_each_entry(p, &mylist, list)
		if (p->key == key) {
			list_del_rcu(&p->list);
			rcu_assign_pointer(cache, NULL);
			spin_unlock(&mylock);
			synchronize_rcu();
			p->invalid = 1;
			schedule_timeout_uninterruptible(10);
			free(p);
			return 0;
		}
	spin_unlock(&mylock);
	return -ENOENT;
}

A check for p->invalid could then be added to the search() function. Needless to say, adding these sorts of checks does require a certain level of paranoia. However, a little paranoia can be a very good thing when writing software, whether sequential or parallel!