深入理解Linux系统-进程管理

张彤 2023年07月28日 418次浏览

二. 进程管理

这部分我们将理解内核提供的创建/删除进程的功能.

2.1 创建进程

在linux中,创建进程有如下两个目的.

  • 将同一个进程分成多个进程进行处理
  • 创建另外一个程序

为了达成这两个目的,linux分别提供了fork()execve()函数.

对应系统调用的请求名称则分别为clone()和execve()

2.2 fork函数

要想将同一个程序分成多个进程进行处理,需要用到fork函数.

我们使用python 的Process 库,调用的就是fork()方法,在调用该方法后,会基于调用的进程,创建一个新的进程,发出请求的进程成为父进程,被创建的进程被成为子进程.

创建进程的流程图如下

创建进程的流程.jpg

  1. 为子进程申请内存空间,并复制父进程的内存到子进程的内存空间.
  2. 父进程与子进程分裂成两个进程,以执行不同的代码.这一点的实现依赖于fork()函数分别返回不同的值给父进程与子进程.

为了更清楚上述步骤,下面使用C语言举例

  1. 创建一个新进程

  2. 父进程输出自身与子进程的进程ID.而子进程只输出自身的进程ID.

    (base) [root@ecs0003 linux_pro]# cc -o fork fork.c
    

    输入后wq保存

    #include <unistd.h>
    #include <stdio.h>
    #include <unistd.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <err.h>
    
    static void child()
    {
    printf("I'm child! my pid is %d.\n",getpid());
    exit(EXIT_SUCCESS);
    }
    
    static void parent(pid_t pid_c)
    {
    printf("I'm parnet! my pid is %d and the pid of my child is %d.\n",getpid(),pid_c);
    exit(EXIT_SUCCESS);
    }
    
    int main(void)
    {
    pid_t ret;
    ret = fork();
    if (ret == -1)
        err(EXIT_FAILURE,"fork() failed");
    if (ret == 0) {
        //fork()会返回0给子进程,因此这里调用child()
        child();
    } else {
        //fork()会返回新创建的子进程的进程ID(大于1)给父进程,因此这里调用parent()
        parent(ret);
    }
    // 当程序异常时,做异常处理
    err(EXIT_FAILURE,"shouldn't reach here!");
    }
    
    

    编译

    (base) [root@ecs0003 linux_pro]# cc -o fork fork.c 
    (base) [root@ecs0003 linux_pro]# ./fork
    I'm parnet! my pid is 187646 and the pid of my child is 187647.
    I'm child! my pid is 187647.
    
    

    使用strace命令

    (base) [root@ecs0003 linux_pro]# strace ./fork
    execve("./fork", ["./fork"], [/* 32 vars */]) = 0
    brk(NULL)                               = 0x21cb000
    mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f862132a000
    access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
    open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
    fstat(3, {st_mode=S_IFREG|0644, st_size=45852, ...}) = 0
    mmap(NULL, 45852, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f862131e000
    close(3)                                = 0
    open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
    read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P%\2\0\0\0\0\0"..., 832) = 832
    fstat(3, {st_mode=S_IFREG|0755, st_size=2173512, ...}) = 0
    mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8620d3d000
    mprotect(0x7f8620f00000, 2093056, PROT_NONE) = 0
    mmap(0x7f86210ff000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x7f86210ff000
    mmap(0x7f8621105000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f8621105000
    close(3)                                = 0
    mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f862131d000
    mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f862131b000
    arch_prctl(ARCH_SET_FS, 0x7f862131b740) = 0
    mprotect(0x7f86210ff000, 16384, PROT_READ) = 0
    mprotect(0x600000, 4096, PROT_READ)     = 0
    mprotect(0x7f862132b000, 4096, PROT_READ) = 0
    munmap(0x7f862131e000, 45852)           = 0
    # 系统包装函数为CLONE
    clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f862131ba10) = 189703
    getpid()                                = 189702
    I'm child! my pid is 189703.
    fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
    mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8621329000
    --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=189703, si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
    write(1, "I'm parnet! my pid is 189702 and"..., 64I'm parnet! my pid is 189702 and the pid of my child is 189703.
    ) = 64
    exit_group(0)                           = ?
    +++ exited with 0 +++
    

2.3 execve() 函数

在启动另外一个进程的时候,需要调用execve()函数.

内核运行的流程如下

  1. 读取可执行文件,并读取创建进程的内存映像所需的信息.
  2. 用新进程的数据覆盖当前进程的内存.
  3. 从最初的命令开始运行新的进程.

也就是说.启动另外一个程序的时候,并非新增了一个进程,而是替换了当前进程.

下面详解以下这个过程.

  1. 首先读取可执行文件,以及创建进程的内存映像所需的信息.

    可执行文件中不仅包含进程在运行过程中使用的代码数据

    • 包含代码的代码段在文件中的偏移量,大小,以及内存映像的起始地址
    • 包含代码以外的变量等数据的数据段在文件中的偏移量,大小,以及内存映像的起始地址.
    • 程序执行的第一条指令的内存地址(入口点)

    我们可以使用readelf -S命令查看可执行文件,可以得到序头,节头和符号表等.-S表示节头信息

    (base) [root@ecs0003 linux_pro]# readelf -S fork
    There are 31 section headers, starting at offset 0x1a40:
    
    Section Headers:
      [Nr] Name              Type             Address           Offset
           Size              EntSize          Flags  Link  Info  Align
      [ 0]                   NULL             0000000000000000  00000000
           0000000000000000  0000000000000000           0     0     0
      [ 1] .interp           PROGBITS         0000000000400238  00000238
           000000000000001c  0000000000000000   A       0     0     1
      [ 2] .note.ABI-tag     NOTE             0000000000400254  00000254
           0000000000000020  0000000000000000   A       0     0     4
      [ 3] .note.gnu.build-i NOTE             0000000000400274  00000274
           0000000000000024  0000000000000000   A       0     0     4
      [ 4] .gnu.hash         GNU_HASH         0000000000400298  00000298
           000000000000001c  0000000000000000   A       5     0     8
    ....................................................................
           0000000000000221  0000000000000000           0     0     1
      [30] .shstrtab         STRTAB           0000000000000000  00001931
           000000000000010c  0000000000000000           0     0     1
    Key to Flags:
      W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
      L (link order), O (extra OS processing required), G (group), T (TLS),
      C (compressed), x (unknown), o (OS specific), E (exclude),
      l (large), p (processor specific)
    
    # 获取全部信息
    (base) [root@ecs0003 linux_pro]# readelf -a hello
    ELF Header:
      Magic:   7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 
      Class:                             ELF64
      Data:                              2's complement, little endian
    ................................................
    Section Headers:
      [Nr] Name              Type             Address           Offset
           Size              EntSize          Flags  Link  Info  Align
      [ 0]                   NULL             0000000000000000  00000000
           0000000000000000  0000000000000000           0     0     0
      [ 1] .interp           PROGBITS         0000000000400238  00000238
           000000000000001c  0000000000000000   A       0     0     1
    ..................................................
      [28] .symtab           SYMTAB           0000000000000000  00001060
           0000000000000600  0000000000000018          29    47     8
      [29] .strtab           STRTAB           0000000000000000  00001660
           00000000000001ca  0000000000000000           0     0     1
      [30] .shstrtab         STRTAB           0000000000000000  0000182a
           000000000000010c  0000000000000000           0     0     1
    .................
    Dynamic section at offset 0xe28 contains 24 entries:
      Tag        Type                         Name/Value
     0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
     0x000000000000000c (INIT)               0x4003c8
     0x000000000000000d (FINI)               0x4005b4
     0x0000000000000019 (INIT_ARRAY)         0x600e10
    ..................
    Symbol table '.symtab' contains 64 entries:
       Num:    Value          Size Type    Bind   Vis      Ndx Name
         0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND 
         1: 0000000000400238     0 SECTION LOCAL  DEFAULT    1 
         2: 0000000000400254     0 SECTION LOCAL  DEFAULT    2 
         3: 0000000000400274     0 SECTION LOCAL  DEFAULT    3 
         4: 0000000000400298     0 SECTION LOCAL  DEFAULT    4 
         5: 00000000004002b8     0 SECTION LOCAL  DEFAULT    5 
         6: 0000000000400318     0 SECTION LOCAL  DEFAULT    6 
         7: 0000000000400356     0 SECTION LOCAL  DEFAULT    7 
    ...........................
        59: 0000000000400430     0 FUNC    GLOBAL DEFAULT   14 _start
        60: 000000000060102c     0 NOTYPE  GLOBAL DEFAULT   26 __bss_start
        61: 000000000040051d    21 FUNC    GLOBAL DEFAULT   14 main
        62: 0000000000601030     0 OBJECT  GLOBAL HIDDEN    25 __TMC_END__
        63: 00000000004003c8     0 FUNC    GLOBAL DEFAULT   11 _init
    .......................
    Displaying notes found at file offset 0x00000274 with length 0x00000024:
      Owner                 Data size	Description
      GNU                  0x00000014	NT_GNU_BUILD_ID (unique build ID bitstring)
        Build ID: 32226bc83daf861fb356e6625d326eb888c3050f
    
    

    也可以使用objdump -d -M intel -S hello命令来查看

    (base) [root@ecs0003 linux_pro]# objdump -d -M intel -S hello
    
    hello:     file format elf64-x86-64
    
    
    Disassembly of section .init:
    
    00000000004003c8 <_init>:
      4003c8:	48 83 ec 08          	sub    rsp,0x8
      4003cc:	48 8b 05 25 0c 20 00 	mov    rax,QWORD PTR [rip+0x200c25]        # 600ff8 <__gmon_start__>
      4003d3:	48 85 c0             	test   rax,rax
      4003d6:	74 05                	je     4003dd <_init+0x15>
      4003d8:	e8 43 00 00 00       	call   400420 <.plt.got>
      4003dd:	48 83 c4 08          	add    rsp,0x8
      4003e1:	c3                   	ret    
    
    Disassembly of section .plt:
    
    00000000004003f0 <.plt>:
      4003f0:	ff 35 12 0c 20 00    	push   QWORD PTR [rip+0x200c12]        # 601008 <_GLOBAL_OFFSET_TABLE_+0x8>
      4003f6:	ff 25 14 0c 20 00    	jmp    QWORD PTR [rip+0x200c14]        # 601010 <_GLOBAL_OFFSET_TABLE_+0x10>
      4003fc:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
    
    0000000000400400 <puts@plt>:
      400400:	ff 25 12 0c 20 00    	jmp    QWORD PTR [rip+0x200c12]        # 601018 <puts@GLIBC_2.2.5>
      400406:	68 00 00 00 00       	push   0x0
      40040b:	e9 e0 ff ff ff       	jmp    4003f0 <.plt>
    
    0000000000400410 <__libc_start_main@plt>:
    ........................................................................................................................
      400583:	4c 89 f6             	mov    rsi,r14
      400586:	44 89 ff             	mov    edi,r15d
      400589:	41 ff 14 dc          	call   QWORD PTR [r12+rbx*8]
      40058d:	48 83 c3 01          	add    rbx,0x1
      400591:	48 39 eb             	cmp    rbx,rbp
      400594:	75 ea                	jne    400580 <__libc_csu_init+0x40>
      400596:	48 83 c4 08          	add    rsp,0x8
      40059a:	5b                   	pop    rbx
      40059b:	5d                   	pop    rbp
      40059c:	41 5c                	pop    r12
      40059e:	41 5d                	pop    r13
      4005a0:	41 5e                	pop    r14
      4005a2:	41 5f                	pop    r15
      4005a4:	c3                   	ret    
      4005a5:	90                   	nop
      4005a6:	66 2e 0f 1f 84 00 00 	nop    WORD PTR cs:[rax+rax*1+0x0]
      4005ad:	00 00 00 
    
    00000000004005b0 <__libc_csu_fini>:
      4005b0:	f3 c3                	repz ret 
    
    Disassembly of section .fini:
    
    00000000004005b4 <_fini>:
      4005b4:	48 83 ec 08          	sub    rsp,0x8
      4005b8:	48 83 c4 08          	add    rsp,0x8
      4005bc:	c3                   	ret    
    
    • 通过objdump,我们可以看到对应代码段编译出的汇编语言命令。

    假设将要运行的程序的可执行文件结构如下:

可执行文件的结构.jpg

与使用高级语言编写的源代码不同,在CPU上执行机器语言指令时,必须提供操作的内存地址,因此在代码段和数据段中必须包含内存映像的起始地址.

在将程序映射到内存之后,从入口点开始运行程序

从入口点开始运行程序.jpg

Linux的可执行文件结构遵循名称为ELF(Executeable and Linkable Format 可执行与可连接格式)的格式.

上面已经使用过的readelf命令,配合-h参数就可以看到入口了

(base) [root@ecs0003 linux_pro]# readelf -h hello
ELF Header:
  Magic:   7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF64
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           Advanced Micro Devices X86-64
  Version:                           0x1
  # 此处就是入口地址
  Entry point address:               0x400430
  Start of program headers:          64 (bytes into file)
  Start of section headers:          6456 (bytes into file)
  Flags:                             0x0
  Size of this header:               64 (bytes)
  Size of program headers:           56 (bytes)
  Number of program headers:         9
  Size of section headers:           64 (bytes)
  Number of section headers:         31
  Section header string table index: 30

参数-S则提供了序头的信息

 Section header string table index: 30
(base) [root@ecs0003 linux_pro]# readelf -S hello
There are 31 section headers, starting at offset 0x1938:

Section Headers:
  [Nr] Name              Type             Address           Offset
       Size              EntSize          Flags  Link  Info  Align
  [ 0]                   NULL             0000000000000000  00000000
       0000000000000000  0000000000000000           0     0     0
  [ 1] .interp           PROGBITS         0000000000400238  00000238
       000000000000001c  0000000000000000   A       0     0     1
  [ 2] .note.ABI-tag     NOTE             0000000000400254  00000254
       0000000000000020  0000000000000000   A       0     0     4
  [ 3] .note.gnu.build-i NOTE             0000000000400274  00000274
       0000000000000024  0000000000000000   A       0     0     4
  [ 4] .gnu.hash         GNU_HASH         0000000000400298  00000298
       000000000000001c  0000000000000000   A       5     0     8
  [ 5] .dynsym           DYNSYM           00000000004002b8  000002b8
       0000000000000060  0000000000000018   A       6     1     8
  [ 6] .dynstr           STRTAB           0000000000400318  00000318
..................................................................................
  [28] .symtab           SYMTAB           0000000000000000  00001060
       0000000000000600  0000000000000018          29    47     8
  [29] .strtab           STRTAB           0000000000000000  00001660
       00000000000001ca  0000000000000000           0     0     1
  [30] .shstrtab         STRTAB           0000000000000000  0000182a
       000000000000010c  0000000000000000           0     0     1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
  L (link order), O (extra OS processing required), G (group), T (TLS),
  C (compressed), x (unknown), o (OS specific), E (exclude),
  l (large), p (processor specific)

  • 输出的数据没两行为一组.
  • 全部数值均为16进制数据
  • 在每组的第一行第二个字段中,.text对应的是代码段信息,而.data对应的则是数据段的信息
  • 我们只需要关注每组的第一行第四个字段内存映像的起始起始地址.第一行第五个字段文件中的偏移量.以及第二行第一个字段,大小

在程序运行期间,进程的内存映像信息,可以从/proc/{pid}/maps这个文件中找到,比如

(base) [root@ecs0003 linux_pro]# /bin/sleep 10000 &
[1] 42182
(base) [root@ecs0003 linux_pro]# cat /proc/42182/maps
# 代码段
00400000-00406000 r-xp 00000000 fd:02 23461                              /usr/bin/sleep
00606000-00607000 r--p 00006000 fd:02 23461                              /usr/bin/sleep
# 数据段
00607000-00608000 rw-p 00007000 fd:02 23461                              /usr/bin/sleep
007d5000-007f6000 rw-p 00000000 00:00 0                                  [heap]
7f8dcf17d000-7f8dd56a6000 r--p 00000000 fd:02 154897                     /usr/lib/locale/locale-archive
7f8dd56a6000-7f8dd5869000 r-xp 00000000 fd:02 16784485                   /usr/lib64/libc-2.17.so
7f8dd5869000-7f8dd5a68000 ---p 001c3000 fd:02 16784485                   /usr/lib64/libc-2.17.so
7f8dd5a68000-7f8dd5a6c000 r--p 001c2000 fd:02 16784485                   /usr/lib64/libc-2.17.so
7f8dd5a6c000-7f8dd5a6e000 rw-p 001c6000 fd:02 16784485                   /usr/lib64/libc-2.17.so
7f8dd5a6e000-7f8dd5a73000 rw-p 00000000 00:00 0 
7f8dd5a73000-7f8dd5a95000 r-xp 00000000 fd:02 16784478                   /usr/lib64/ld-2.17.so
7f8dd5c84000-7f8dd5c87000 rw-p 00000000 00:00 0 
7f8dd5c93000-7f8dd5c94000 rw-p 00000000 00:00 0 
7f8dd5c94000-7f8dd5c95000 r--p 00021000 fd:02 16784478                   /usr/lib64/ld-2.17.so
7f8dd5c95000-7f8dd5c96000 rw-p 00022000 fd:02 16784478                   /usr/lib64/ld-2.17.so
7f8dd5c96000-7f8dd5c97000 rw-p 00000000 00:00 0 
7ffe30beb000-7ffe30c0c000 rw-p 00000000 00:00 0                          [stack]
7ffe30c0f000-7ffe30c11000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]

如上,r-xp代表代码段的映射地址,而rw-p则代表数据段

在打算新建一个其他进程时,通常采用被称为fork and exec的方式,即由父进程调用fork()创建子进程,再由子进程调用exec(),

下面以echo "hello world"为例查看echo的调用

(base) [root@ecs0003 linux_pro]# strace echo "helloworld"
execve("/bin/echo", ["echo", "helloworld"], [/* 32 vars */]) = 0
brk(NULL)                               = 0x1f3e000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9ca2326000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=45852, ...}) = 0
mmap(NULL, 45852, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f9ca231a000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P%\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2173512, ...}) = 0
mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f9ca1d39000
mprotect(0x7f9ca1efc000, 2093056, PROT_NONE) = 0
mmap(0x7f9ca20fb000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x7f9ca20fb000
mmap(0x7f9ca2101000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f9ca2101000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9ca2319000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9ca2317000
arch_prctl(ARCH_SET_FS, 0x7f9ca2317740) = 0
mprotect(0x7f9ca20fb000, 16384, PROT_READ) = 0
mprotect(0x606000, 4096, PROT_READ)     = 0
mprotect(0x7f9ca2327000, 4096, PROT_READ) = 0
munmap(0x7f9ca231a000, 45852)           = 0
brk(NULL)                               = 0x1f3e000
brk(0x1f5f000)                          = 0x1f5f000

由bash进程创建的echo进程的流程.jpg

2.4 结束进程

进程的结束可以通过_exit()函数(底层发起exit_group()系统调用)来结束进程.

在进程结束后,所有分配给进程的内存将被回收.

进程结束时回收其内存资源.jpg

一般很少直接调用_exit()函数,而是通过调用C标准库中的exit()函数来结束进程的运行.这种情况下,C标准库会在调用完自身的终止处理后调用_exit()函数.在main()函数中恢复时也是同样方式.