Napisałem test, większość w Asemblerze żeby mieć większą kontrolę do rejestru R15 wstawiłem TID z gettid()
Implementacja CLH-locka:
Kopiuj
.intel_syntax noprefix
.globl queuelock_init
.globl queuelock_lock
.globl queuelock_unlock
.globl queuelock_destroy
.section .text,"ax",@progbits
queuelock_init:
xor eax, eax
mov QWORD PTR [rdi], rax
ret
queuelock_destroy:
test QWORD PTR [rdi], -1
jne 0f
xor eax, eax
ret
0:
or eax, -1
ret
queuelock_lock:
push rbp
mov rbp, rsp
sub rsp, 8
# node = malloc(...)
push rdi
mov edi, 8
call fake_malloc
pop rdi
# node->locked = 1
mov DWORD PTR [rax], 1
# pred = atomic_exchange(&tail, node)
push rax
xchg QWORD PTR [rdi], rax
# if (pred != NULL)
test rax, rax
je 1f
# while (pred->locked) spin
0:
pause
test QWORD PTR [rax], -1
jne 0b
# free(pred)
mov rdi, rax
call fake_free
# return node
1:
pop rax
leave
ret
queuelock_unlock:
push rbp
mov rbp, rsp
# if (CAS(&tail, node, NULL))
mov rax, rsi
xor edx, edx
lock cmpxchg QWORD PTR [rdi], rdx
jne 0f
# free(node)
mov rdi, rsi
call fake_free
# return
xor eax, eax
leave
ret
# node->locked = 0
0:
xor eax, eax
mov QWORD PTR [rsi], rax
# return
leave
ret
Tworzenie wątku:
Kopiuj
#pragma once
int thread_create(int(*)(void*), void*);
void thread_exit(int) __attribute__ ((noreturn));
Kopiuj
.intel_syntax noprefix
.globl thread_create
.globl thread_exit
.section .text,"ax",@progbits
thread_create:
push rbp
mov rbp, rsp
push rdi
push rsi
xor edi, edi # pid = NULL
mov eax, 302 # syscall = prlimit64
xor edx, edx # new_rlim = NULL
lea esi, DWORD PTR [rdi+3] # resource = RLIMIT_STACK
sub rsp, 16
mov r10, rsp # old_rlim = ?
syscall # prlimit64(0, RLIMIT_STACK, NULL, rsp)
mov edx, esi # prot = PROT_READ | PROT_WRITE
pop rsi # length = old_rlim.rlim_cur
add rsp, 8
# xor edi, edi # addr = NULL
mov r10d, 0x4020022 # flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_UNINITIALIZED
lea eax, DWORD PTR [edi+9] # syscall = mmap
or r8, -1 # fd = -1
mov r9d, edi # offset = 0
syscall # mmap(NULL, old_rlim.rlim_cur, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_UNINITIALIZED, -1, 0)
mov rdx, rax # arg3 = ?
lea rdi, [rax+rsi-16]
pop rax
stos QWORD PTR es:[rdi], rax
pop rax
stos QWORD PTR es:[rdi], rax
mov r10, rsi # arg4 = old_rlim.rlim_cur
mov edi, 0x53564d41 # arg2 = PR_SET_VMA
mov rax, 0x6B63617473 # "stack"
xor esi, esi # option = PR_SET_VMA_ANON_NAME
push rax
mov r8, rsp # arg5 = "stack"
mov eax, 157 # syscall = prctl
syscall # prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ?, limit.rlim_cur, "stack")
add rsp, 8
lea eax, [rsi+56] # syscall = clone
mov edi, 0x50F00 # flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM
lea rsi, [rdx+r10] # stack = ?
syscall # clone(?, CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM)
test rax, rax
jne 0f
sub rsp, 16
pop rdi
pop rdx
xor ebp, ebp
lea rax, DWORD PTR [rip+1f]
push rbp
push rbp
mov rbp, rsp
push rax
jmp rdx
0:
leave
ret
thread_exit:
xor eax, eax
mov al, 60 # syscall = exit
syscall # exit(?)
1:
mov rdi, rax # status = ?
jmp thread_exit
main() w C:
Kopiuj
#include <unistd.h>
#include <stdio.h>
#include "thread.h"
#include "queuelock.h"
#define MAX_THREADS 16
queuelock_t lock;
unsigned long int counter = 0;
unsigned int finished = 0; /* FIXME: unprotected in C, Assembly uses LOCK prefix */
extern int f(void*);
int main()
{
int i;
setvbuf(stdout, NULL, _IONBF, 0);
queuelock_init(&lock);
for (i = 1; i <= MAX_THREADS; ++i) {
thread_create(f, NULL);
}
while (finished != MAX_THREADS) {
sleep(1);
}
printf("%ld\n", counter);
queuelock_destroy(&lock);
return 0;
}
Test w Asemblerze:
Kopiuj
.intel_syntax noprefix
.globl f
.globl fake_malloc
.globl fake_free
.section .text,"ax",@progbits
0: .asciz "Thread %u Allocates\n"
1: .asciz "Thread %u Allocated %X\n"
fake_malloc:
push rbp
mov rbp, rsp
sub rsp, 8
push rdi
lea rdi, DWORD PTR [rip+0b]
mov rsi, r15
call printf
pop rdi
add rsp, 8
call malloc
sub rsp, 8
push rax
lea rdi, DWORD PTR [rip+1b]
mov rsi, r15
mov rdx, rax
call printf
pop rax
add rsp, 8
leave
ret
0: .asciz "Thread %u Frees %X\n"
1: .asciz "Thread %u Freed %X\n"
fake_free:
push rbp
mov rbp, rsp
sub rsp, 8
push rdi
mov rdx, rdi
lea rdi, DWORD PTR [rip+0b]
mov rsi, r15
call printf
mov rdi, QWORD PTR [rsp]
call free
pop rdx
push rax
lea rdi, DWORD PTR [rip+1b]
mov rsi, r15
call printf
pop rax
add rsp, 8
leave
ret
f:
push rbp
mov rbp, rsp
sub rsp, 8
push rbx
push r12
push r13
mov eax, 186 # syscall = gettid
syscall # gettid()
mov r15, rax
lea rbx, lock
lea r12, counter
mov r13d, 1000000
0:
mov rdi, rbx
call queuelock_lock
inc QWORD PTR [r12]
mov rdi, rbx
mov rsi, rax
call queuelock_unlock
dec r13d
jne 0b
lea rbx, finished
lock inc DWORD PTR [rbx]
add rsp, 8
pop r13
pop r12
pop rbx
xor eax, eax
leave
ret