c - 内核块设备 - 使用自旋锁陷入死锁



我刚刚实现了一个虚拟块设备,我想使用它来探索linux内核如何处理块设备。

我的设备只是一个内存区域,分为两个 512 字节扇区。

我正在使用全局结构来存储设备信息:

typedef struct
{
uint32_t hard_sector_size;        // Size of a device sector
uint32_t sector_number;           // Number of sector on device
uint32_t size;                    // Total size of virtual device in bytes
uint8_t* data;                    // Device memory buffer
spinlock_t device_lock;           // Device structure access spinlock
struct request_queue *queue;      // Device request queue
struct gendisk *gendisk;          // Device "disk" representation
int major;                        // Device major number attributed by kernel
int minor;                        // Device minor number fixed at initialization
uint32_t r_users;                 // Number of read access
uint32_t w_users;                 // Number of write access
}blk_mod_t;
blk_mod_t self;
[...]

现在我想保护此结构免受并发访问。为此,我正在使用device_lock字段。
如果锁定被占用,则结构正在更新,所以我应该等到完成。
如果不采用,我可以访问结构字段。

现在我只在以下三个函数中使用此旋转锁

static int block_mod_open(struct block_device *bdev, fmode_t mode)
{
access_mode_t access_mode;
DEBUG("Entering open functionn");
if((mode & FMODE_READ) && (mode & FMODE_WRITE))
{
NOTICE("Oppened in read/write moden");
mode = ACCESS_RW;
}
else if(mode & FMODE_READ)
{
NOTICE("Oppened in read only moden");
mode = ACCESS_RONLY;
}
else if(mode & FMODE_WRITE)
{
NOTICE("Oppened in write only moden");
mode = ACCESS_WONLY;
}
DEBUG("<--n");
spin_lock(&self.device_lock);
if(ACCESS_RW == access_mode)
{
self.r_users++;
self.w_users++;
}
else if(ACCESS_RONLY == access_mode)
{
self.r_users++;
}
else
{
self.w_users++;
}
NOTICE("Read access: %dtWrite access: %dn", self.r_users, self.w_users);
DEBUG("-->n");
spin_unlock(&self.device_lock);
DEBUG("Exiting open functionn");
return 0;
}
static void block_mod_release(struct gendisk *disk, fmode_t mode)
{
access_mode_t access_mode;
DEBUG("Entering release functionn");
if((mode & FMODE_READ) && (mode & FMODE_WRITE))
{
NOTICE("Closed read/write moden");
mode = ACCESS_RW;
}
else if(mode & FMODE_READ)
{
NOTICE("Closed read only moden");
mode = ACCESS_RONLY;
}
else if(mode & FMODE_WRITE)
{
NOTICE("Closed write only moden");
mode = ACCESS_WONLY;
}
DEBUG("<--n");
spin_lock(&self.device_lock);
if(ACCESS_RW == access_mode)
{
self.r_users--;
self.w_users--;
}
else if(ACCESS_RONLY == access_mode)
{
self.r_users--;
}
else
{
self.w_users--;
}
NOTICE("Read access: %dtWrite access: %dn", self.r_users, self.w_users);
DEBUG("-->n");
spin_unlock(&self.device_lock);
DEBUG("Exiting release functionn");
return;
}
static void block_mod_transfer(unsigned long sector, unsigned long nsect, char *buffer, int write)
{
unsigned long offset = sector*KERNEL_SECTOR_SIZE;
unsigned long nbytes = nsect*KERNEL_SECTOR_SIZE;
DEBUG("Entering transfer functionn");
DEBUG("<--n");
spin_lock(&self.device_lock);
if((offset + nbytes) > self.size) {
WARNING("Beyond-end write (%ld %ld)n", offset, nbytes);
spin_unlock(&self.device_lock);
return;
}
if(write)
{
NOTICE("Writing to devicen");
memcpy(self.data + offset, buffer, nbytes);
}
else
{
NOTICE("Reading from devicen");
memcpy(buffer, self.data + offset, nbytes);
}
DEBUG("-->n");
spin_unlock(&self.device_lock);
DEBUG("Exiting transfer functionn");
}

我正在使用以下函数处理请求

static void block_mod_request(struct request_queue *queue)
{
DEBUG("Entering request functionn");
struct request *request;
while(NULL != (request = blk_fetch_request(queue)))
{
blk_mod_t *self = request->rq_disk->private_data;
// Check if request is a filesystem request (i.e. moves block of data)
if(REQ_TYPE_FS != request->cmd_type)
{
// Close request with unsuccessful status
WARNING("Skip non-fs requestn");
__blk_end_request_cur(request, -EIO);
continue;
}
// Treat request
block_mod_transfer(blk_rq_pos(request), blk_rq_cur_sectors(request), bio_data(request->bio), rq_data_dir(request));
// Close request with successful status
__blk_end_request_cur(request, 0);
}
DEBUG("Exiting request functionn");
return;
}

当我加载模块时,没有任何异常发生。但是,如果我尝试从中读取,我会陷入死锁,因为我的系统不再回答,我必须重新启动。

这是输出:

root@PC325:~# echo 8 > /proc/sys/kernel/printk
root@PC325:~# insmod block_mod.ko 
[   64.546791] block_mod: loading out-of-tree module taints kernel.
[   64.548197] block_mod: module license '(c) Test license' taints kernel.
[   64.549951] Disabling lock debugging due to kernel taint
[   64.552816] Inserting module 'blk_mod_test'
[   64.554085] Got major number : '254'
[   64.554940] Data allocated (size = 1024)
[   64.557378] Request queue initialized
[   64.558178] Sent hard sector size to request queue
[   64.559188] Gendisk allocated
[   64.559817] Gendisk filled
[   64.560416] Gendisk capacity set
[   64.563285] Gendisk added
root@PC325:~# [   64.565280] Entering open function
[   64.566035] Oppened in read only mode
[   64.566773] <--
[   64.567138] Read access: 1   Write access: 0
[   64.567977] -->
[   64.568342] Exiting open function
[   64.571080] Entering release function
[   64.571855] Closed read only mode
[   64.572531] <--
[   64.572924] Read access: 0   Write access: 0
[   64.573749] -->
[   64.574116] Exiting release function
root@PC325:~# cat /dev/blkmodtest 
[   78.488228] Entering open function
[   78.488988] Oppened in read only mode
[   78.489733] <--
[   78.490100] Read access: 1   Write access: 0
[   78.490925] -->
[   78.491290] Exiting open function
[   78.492026] Entering request function
[   78.492743] Entering transfer function
[   78.493469] <--
-------------- DEADLOCK HERE --------------

更新:添加初始化和退出函数

static int __init block_mod_init(void)
{
char* message = "abcdefghijklmnopqrstuvwxyz";
int i;
INFO("Inserting module '%s'n", MODULE_NAME);
// Initialize driver data structure
memset(&self, 0, sizeof(blk_mod_t));
self.hard_sector_size = DEVICE_HARD_SECTOR_SIZE;
self.sector_number = DEVICE_SECTOR_NUMBER;
self.size = self.sector_number*self.hard_sector_size;
self.minor = 1;
// Get a major number from kernel
if(0 > (self.major = register_blkdev(self.major, MODULE_NAME)))
{
ERROR("Unable to get major number for '%s'n", MODULE_NAME);
unregister_blkdev(self.major, MODULE_NAME);
return -1;
}
DEBUG("Got major number : '%d'n", self.major);
// Allocate data space
if(NULL == (self.data = vmalloc(self.size)))
{
ERROR("Unable to allocate memory for '%s'n", MODULE_NAME);
unregister_blkdev(self.major, MODULE_NAME);
return -2;
}
for(i=0;i<self.size;i++)
{
self.data[i] = message[i%strlen(message)];
}
spin_lock_init(&self.device_lock);
DEBUG("Data allocated (size = %d)n", self.size);
// Allocate the request queue
if(NULL == (self.queue = blk_init_queue(block_mod_request, &self.device_lock)))
{
ERROR("Unable to initialize request queue for '%s'n", MODULE_NAME);
vfree(self.data);
unregister_blkdev(self.major, MODULE_NAME);
return -3;
}
DEBUG("Request queue initializedn");
// Send device hard sector size to request queue
blk_queue_logical_block_size(self.queue, self.hard_sector_size);
self.queue->queuedata = &self;
DEBUG("Sent hard sector size to request queuen");
// Allocate the gendisk structure
if(NULL == (self.gendisk = alloc_disk(self.minor)))
{
ERROR("Unable to initialize gendisk for '%s'n", MODULE_NAME);
blk_cleanup_queue(self.queue);
vfree(self.data);
unregister_blkdev(self.major, MODULE_NAME);
return -4;
}
DEBUG("Gendisk allocatedn");
// Fill gendisk structure
self.gendisk->major = self.major;
self.gendisk->first_minor = self.minor;
self.gendisk->fops = &self_ops;
self.gendisk->queue = self.queue;
self.gendisk->private_data = &self;
snprintf(self.gendisk->disk_name, 32, "blkmodtest");
DEBUG("Gendisk filledn");
set_capacity(self.gendisk, self.sector_number*(self.hard_sector_size/KERNEL_SECTOR_SIZE));
DEBUG("Gendisk capacity setn");
add_disk(self.gendisk);
DEBUG("Gendisk addedn");
return 0;
}
static void __exit block_mod_cleanup(void)
{
del_gendisk(self.gendisk);
put_disk(self.gendisk);
blk_cleanup_queue(self.queue);
vfree(self.data);
unregister_blkdev(self.major, MODULE_NAME);
INFO("Removing module '%s'n", MODULE_NAME);
return;
}

更新:添加宏和枚举定义

#define MODULE_NAME                    "blk_mod_test"
#define KERNEL_SECTOR_SIZE             512
#define DEVICE_HARD_SECTOR_SIZE        512
#define DEVICE_SECTOR_NUMBER           2
typedef enum
{
ACCESS_RONLY = 0,
ACCESS_WONLY = 1,
ACCESS_RW    = 2,
}access_mode_t;

我不明白的是,在我尝试将其锁定到block_mod_transfer之前,旋转锁已被释放(在block_mod_open年底)。

所以我不明白为什么当旋转锁似乎可以采用时内核陷入僵局。

为什么在这种情况下我陷入僵局?我做错了什么?

多亏了@CraigEstey的评论,我终于发现问题来自这样一个事实,即请求队列是使用与我的设备结构相同的自旋锁初始化的。

// Allocate the request queue
if(NULL == (self.queue = blk_init_queue(block_mod_request, &self.device_lock)))
{
ERROR("Unable to initialize request queue for '%s'n", MODULE_NAME);
vfree(self.data);
unregister_blkdev(self.major, MODULE_NAME);
return -3;
}

因此,当请求队列的回调函数(即block_mod_request)被调用,旋转锁已经持有,我陷入了死锁。

相关内容

  • 没有找到相关文章

最新更新