一、引言
所谓死锁,是指多个线程或进程各自持有某些资源,同时又等待着别的线程或进程释放它们现在所保持的资源,否则就不能向前推进。如下图:线程各自占有一把锁,还需要申请别的线程当前持有的锁,形成锁资源的循环等待,这就是死锁。
从上图中,我们可以看到,死锁一定伴随某种循环资源的赖,也就是形成了闭环,所谓死锁检测,就是只要能检测出环的存在,就能检测到死锁。
二、hook技术
Hook技术又叫做钩子函数,原理是在系统没有调用该函数之前,钩子程序就先捕获该消息,并得到控制权。这时我们就可以在钩子函数执行自己的代码片段。底层原理实际上是利用动态加载的过程中替换原有函数符号的地址,来执行我们自定义的钩子函数。
dlsym函数
void* dlsym(void* handle, char* symbol);
dlsym()函数是动态加载的核心函数,其中,第二个参数就是所要查找的符号。如果找到该符号,返回该符号的值;没有找到返回NULL。它的返回值对于不同类型的符号,意义是不同的。如果查找的是函数,返回的就是函数地址;如果是变量,返回的是变量的地址。
以下为钩子函数的示例:
#define _GNU_SOURCE
#include <dlfcn.h>typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);
pthread_mutex_lock_t pthread_mutex_lock_f;typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);
pthread_mutex_unlock_t pthread_mutex_unlock_f;static int init_hook() {// 使用dlsym函数获取系统pthread_mutex_lock和pthread_mutex_unlock的地址pthread_mutex_lock_f = dlsym(RTLD_NEXT, "pthread_mutex_lock");pthread_mutex_unlock_f = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
}int pthread_mutex_lock(pthread_mutex_t *mutex) {// TODO whatever you wantpthread_mutex_lock_f(mutex); // 执行系统真正的pthread_mutex_lock函数// TODO whatever you want
}int pthread_mutex_unlock(pthread_mutex_t *mutex) {// TODO whatever you wantpthread_mutex_unlock_f(mutex); // 执行系统真正的pthread_mutex_unlock函数// TODO whatever you want
}
三、死锁检测原理
这种资源的依赖关系,可以使用数据结构有向图来构建,如果对图不了解的同学戳这里。如线程A想要获取线程B已占有的资源,则建立一条A指向B的关系。有向图可以参考数据结构——图详解及代码实现。
四、死锁检测完整代码实现
#define _GNU_SOURCE
#include <dlfcn.h>#include <stdio.h>
#include <pthread.h>
#include <unistd.h>#include <stdlib.h>
#include <stdint.h>#define THREAD_NUM 10typedef unsigned long int uint64;typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);
pthread_mutex_lock_t pthread_mutex_lock_f;typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);
pthread_mutex_unlock_t pthread_mutex_unlock_f;#if 1 // graph#define MAX 100enum Type {PROCESS, RESOURCE};struct source_type {uint64 id;enum Type type;uint64 lock_id;int degress;
};struct vertex {struct source_type s;struct vertex *next;
};struct task_graph {struct vertex list[MAX];int num;struct source_type locklist[MAX];int lockidx;pthread_mutex_t mutex;
};struct task_graph *tg = NULL;
int path[MAX+1];
int visited[MAX];
int k = 0;
int deadlock = 0;struct vertex *create_vertex(struct source_type type) {struct vertex *tex = (struct vertex *)malloc(sizeof(struct vertex ));tex->s = type;tex->next = NULL;return tex;
}int search_vertex(struct source_type type) {int i = 0;for (i = 0;i < tg->num;i ++) {if (tg->list[i].s.type == type.type && tg->list[i].s.id == type.id) {return i;}}return -1;
}void add_vertex(struct source_type type) {if (search_vertex(type) == -1) {tg->list[tg->num].s = type;tg->list[tg->num].next = NULL;tg->num ++;}
}int add_edge(struct source_type from, struct source_type to) {add_vertex(from);add_vertex(to);struct vertex *v = &(tg->list[search_vertex(from)]);while (v->next != NULL) {v = v->next;}v->next = create_vertex(to);
}int verify_edge(struct source_type i, struct source_type j) {if (tg->num == 0) return 0;int idx = search_vertex(i);if (idx == -1) {return 0;}struct vertex *v = &(tg->list[idx]);while (v != NULL) {if (v->s.id == j.id) return 1;v = v->next;}return 0;
}int remove_edge(struct source_type from, struct source_type to) {int idxi = search_vertex(from);int idxj = search_vertex(to);if (idxi != -1 && idxj != -1) {struct vertex *v = &tg->list[idxi];struct vertex *remove;while (v->next != NULL) {if (v->next->s.id == to.id) {remove = v->next;v->next = v->next->next;free(remove);break;}v = v->next;}}
}void print_deadlock(void) {int i = 0;printf("deadlock : ");for (i = 0;i < k-1;i ++) {printf("%ld --> ", tg->list[path[i]].s.id);}printf("%ld\n", tg->list[path[i]].s.id);
}int DFS(int idx) {struct vertex *ver = &tg->list[idx];if (visited[idx] == 1) {path[k++] = idx;print_deadlock();deadlock = 1;return 0;}visited[idx] = 1;path[k++] = idx;while (ver->next != NULL) {DFS(search_vertex(ver->next->s));k --;ver = ver->next;}return 1;
}int search_for_cycle(int idx) {struct vertex *ver = &tg->list[idx];visited[idx] = 1;k = 0;path[k++] = idx;while (ver->next != NULL) {int i = 0;for (i = 0;i < tg->num;i ++) {if (i == idx) continue;visited[i] = 0;}for (i = 1;i <= MAX;i ++) {path[i] = -1;}k = 1;DFS(search_vertex(ver->next->s));ver = ver->next;}
}#if 0
int main() {tg = (struct task_graph*)malloc(sizeof(struct task_graph));tg->num = 0;struct source_type v1;v1.id = 1;v1.type = PROCESS;add_vertex(v1);struct source_type v2;v2.id = 2;v2.type = PROCESS;add_vertex(v2);struct source_type v3;v3.id = 3;v3.type = PROCESS;add_vertex(v3);struct source_type v4;v4.id = 4;v4.type = PROCESS;add_vertex(v4);struct source_type v5;v5.id = 5;v5.type = PROCESS;add_vertex(v5);add_edge(v1, v2);add_edge(v2, v3);add_edge(v3, v4);add_edge(v4, v5);add_edge(v3, v1);search_for_cycle(search_vertex(v1));}
#endif#endifvoid check_dead_lock(void) {int i = 0;deadlock = 0;for (i = 0;i < tg->num;i ++) {if (deadlock == 1) break;search_for_cycle(i);}if (deadlock == 0) {printf("no deadlock\n");}}static void *thread_routine(void *args) {while (1) {sleep(2);check_dead_lock();}
}void start_check(void) {tg = (struct task_graph*)malloc(sizeof(struct task_graph));tg->num = 0;tg->lockidx = 0;pthread_t tid;pthread_create(&tid, NULL, thread_routine, NULL);
}#if 1int search_lock(uint64 lock) {int i = 0;for (i = 0;i < tg->lockidx;i ++) {if (tg->locklist[i].lock_id == lock) {return i;}}return -1;
}int search_empty_lock(uint64 lock) {int i = 0;for (i = 0;i < tg->lockidx;i ++) {if (tg->locklist[i].lock_id == 0) {return i;}}return tg->lockidx;
}#endifint inc(int *value, int add) {int old;__asm__ volatile("lock;xaddl %2, %1;": "=a"(old): "m"(*value), "a" (add): "cc", "memory");return old;
}void print_locklist(void) {int i = 0;printf("print_locklist: \n");printf("---------------------\n");for (i = 0;i < tg->lockidx;i ++) {printf("threadid : %ld, lockid: %ld\n", tg->locklist[i].id, tg->locklist[i].lock_id);}printf("---------------------\n\n\n");
}void lock_before(uint64 thread_id, uint64 lockaddr) {int idx = 0;// list<threadid, toThreadid>for(idx = 0;idx < tg->lockidx;idx ++) {if ((tg->locklist[idx].lock_id == lockaddr)) {struct source_type from;from.id = thread_id;from.type = PROCESS;add_vertex(from);struct source_type to;to.id = tg->locklist[idx].id;tg->locklist[idx].degress++;to.type = PROCESS;add_vertex(to);if (!verify_edge(from, to)) {add_edge(from, to); // }}}
}void lock_after(uint64 thread_id, uint64 lockaddr) {int idx = 0;if (-1 == (idx = search_lock(lockaddr))) { // lock list opera int eidx = search_empty_lock(lockaddr);tg->locklist[eidx].id = thread_id;tg->locklist[eidx].lock_id = lockaddr;inc(&tg->lockidx, 1);} else {struct source_type from;from.id = thread_id;from.type = PROCESS;struct source_type to;to.id = tg->locklist[idx].id;tg->locklist[idx].degress --;to.type = PROCESS;if (verify_edge(from, to))remove_edge(from, to);tg->locklist[idx].id = thread_id;}
}void unlock_after(uint64 thread_id, uint64 lockaddr) {int idx = search_lock(lockaddr);if (tg->locklist[idx].degress == 0) {tg->locklist[idx].id = 0;tg->locklist[idx].lock_id = 0;//inc(&tg->lockidx, -1);}
}int pthread_mutex_lock(pthread_mutex_t *mutex) {pthread_t selfid = pthread_self(); //lock_before(selfid, (uint64)mutex);pthread_mutex_lock_f(mutex);lock_after(selfid, (uint64)mutex);}int pthread_mutex_unlock(pthread_mutex_t *mutex) {pthread_t selfid = pthread_self();pthread_mutex_unlock_f(mutex);unlock_after(selfid, (uint64)mutex);
}static int init_hook() {pthread_mutex_lock_f = dlsym(RTLD_NEXT, "pthread_mutex_lock");pthread_mutex_unlock_f = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
}#if 1pthread_mutex_t mutex_1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_4 = PTHREAD_MUTEX_INITIALIZER;void *thread_rountine_1(void *args)
{pthread_t selfid = pthread_self(); //printf("thread_routine 1 : %ld \n", selfid);pthread_mutex_lock(&mutex_1);sleep(1);pthread_mutex_lock(&mutex_2);pthread_mutex_unlock(&mutex_2);pthread_mutex_unlock(&mutex_1);return (void *)(0);
}void *thread_rountine_2(void *args)
{pthread_t selfid = pthread_self(); //printf("thread_routine 2 : %ld \n", selfid);pthread_mutex_lock(&mutex_2);sleep(1);pthread_mutex_lock(&mutex_3);pthread_mutex_unlock(&mutex_3);pthread_mutex_unlock(&mutex_2);return (void *)(0);
}void *thread_rountine_3(void *args)
{pthread_t selfid = pthread_self(); //printf("thread_routine 3 : %ld \n", selfid);pthread_mutex_lock(&mutex_3);sleep(1);pthread_mutex_lock(&mutex_4);pthread_mutex_unlock(&mutex_4);pthread_mutex_unlock(&mutex_3);return (void *)(0);
}void *thread_rountine_4(void *args)
{pthread_t selfid = pthread_self(); //printf("thread_routine 4 : %ld \n", selfid);pthread_mutex_lock(&mutex_4);sleep(1);pthread_mutex_lock(&mutex_1);pthread_mutex_unlock(&mutex_1);pthread_mutex_unlock(&mutex_4);return (void *)(0);
}int main()
{init_hook();start_check();printf("start_check\n");pthread_t tid1, tid2, tid3, tid4;pthread_create(&tid1, NULL, thread_rountine_1, NULL);pthread_create(&tid2, NULL, thread_rountine_2, NULL);pthread_create(&tid3, NULL, thread_rountine_3, NULL);pthread_create(&tid4, NULL, thread_rountine_4, NULL);pthread_join(tid1, NULL);pthread_join(tid2, NULL);pthread_join(tid3, NULL);pthread_join(tid4, NULL);return 0;
}#endif
运行结果:
可以看到检测到了死锁,线程1->2->3->4->1.
文章参考于<零声教育>的C/C++linux服务期高级架构