1 /* Program execution for Emacs.
2
3 Copyright (C) 2023 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
19
20 #include <config.h>
21
22 #include <sys/ptrace.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25
26 #include <limits.h>
27 #include <stddef.h>
28 #include <string.h>
29 #include <assert.h>
30 #include <signal.h>
31 #include <unistd.h>
32 #include <stdlib.h>
33 #include <errno.h>
34
35 #include "exec.h"
36
37 #include SYSCALL_HEADER
38 #include USER_HEADER
39
40 #ifdef __aarch64__
41 #include <sys/uio.h> /* for struct iovec */
42 #include <linux/elf.h> /* for NT_* */
43 #endif /* __aarch64__ */
44
45 #ifdef HAVE_SYS_UIO_H
46 #include <sys/uio.h> /* for process_vm_readv */
47 #endif /* HAVE_SYS_UIO_H */
48
49 #ifndef SYS_SECCOMP
50 #define SYS_SECCOMP 1
51 #endif /* SYS_SECCOMP */
52
53 #ifndef PTRACE_GETEVENTMSG
54 #define PTRACE_GETEVENTMSG 0x4201
55 #endif /* PTRACE_GETEVENTMSG */
56
57
58
59 /* Program tracing functions.
60
61 The main entry point is the function `tracing_execve', which traces
62 the thread and calls exec. Each time that thread calls `clone',
63 the new child is traced as well.
64
65 Instead of calling `waitpid', call `exec_waitpid' instead. */
66
67
68
69 /* Number of tracees children are allowed to create. */
70 #define MAX_TRACEES 4096
71
72 #ifdef __aarch64__
73
74 /* Place PID's registers into *REGS. Return 1 upon failure, else
75 0. */
76
77 int
78 aarch64_get_regs (pid_t pid, USER_REGS_STRUCT *regs)
79 {
80 struct iovec iov;
81
82 iov.iov_base = regs;
83 iov.iov_len = sizeof *regs;
84
85 return (ptrace (PTRACE_GETREGSET, pid, NT_PRSTATUS,
86 &iov) != 0);
87 }
88
89 /* Set PID's registers to *REGS. If SYSCALL_P, also update the
90 current system call number to the `x8' register.
91
92 Value is 1 upon failure, else 0. */
93
94 int
95 aarch64_set_regs (pid_t pid, USER_REGS_STRUCT *regs,
96 bool syscall_p)
97 {
98 struct iovec iov;
99 USER_WORD callno;
100 long rc;
101
102 /* Write the user registers. */
103
104 iov.iov_base = regs;
105 iov.iov_len = sizeof *regs;
106
107 rc = ptrace (PTRACE_SETREGSET, pid, NT_PRSTATUS,
108 &iov);
109 if (rc < 0)
110 return 1;
111
112 /* Now, write the system call number if necessary. */
113
114 if (syscall_p)
115 {
116 callno = regs->regs[8];
117 iov.iov_base = &callno;
118 iov.iov_len = sizeof callno;
119
120 return (ptrace (PTRACE_SETREGSET, pid, NT_ARM_SYSTEM_CALL,
121 &iov) != 0);
122 }
123
124 return 0;
125 }
126
127 #endif /* __aarch64__ */
128
129
130
131 /* List of all processes which are being traced. */
132 static struct exec_tracee *tracing_processes;
133
134
135
136 /* Read N bytes from TRACEE's memory, starting at the specified user
137 ADDRESS. Return its contents in BUFFER.
138
139 If there are unreadable pages within ADDRESS + N, the contents of
140 BUFFER after the first such page becomes undefined. */
141
142 static void
143 read_memory (struct exec_tracee *tracee, char *buffer,
144 USER_WORD n, USER_WORD address)
145 {
146 USER_WORD word, n_words, n_bytes, i;
147 long rc;
148 #ifdef HAVE_PROCESS_VM
149 struct iovec iov, remote;
150
151 /* If `process_vm_readv' is available, use it instead. */
152
153 iov.iov_base = buffer;
154 iov.iov_len = n;
155 remote.iov_base = (void *) address;
156 remote.iov_len = n;
157
158 /* Return immediately if successful. As long as some bytes were
159 read, consider the read to have been a success. */
160
161 if (n <= SSIZE_MAX
162 && ((size_t) process_vm_readv (tracee->pid, &iov, 1,
163 &remote, 1, 0) != -1))
164 return;
165
166 #endif /* HAVE_PROCESS_VM */
167
168 /* First, read entire words from the tracee. */
169 n_words = n & ~(sizeof (USER_WORD) - 1);
170
171 /* Next, determine the number of bytes to read from the last
172 word. */
173 n_bytes = n & (sizeof (USER_WORD) - 1);
174
175 /* Start reading words. */
176 i = 0;
177 while (n_words)
178 {
179 rc = ptrace (PTRACE_PEEKTEXT, tracee->pid,
180 (void *) address + i, NULL);
181 word = rc;
182 memcpy (buffer, &word, sizeof word);
183 buffer += sizeof word;
184 i += sizeof word;
185 n_words -= sizeof word;
186 }
187
188 /* Now, read the remaining bytes. */
189 assert (n_bytes < sizeof (word));
190
191 if (n_bytes)
192 {
193 rc = ptrace (PTRACE_PEEKTEXT, tracee->pid,
194 (void *) address + i, NULL);
195 word = rc;
196
197 /* Copy only n_bytes to the caller. */
198 memcpy (buffer, &word, n_bytes);
199 }
200 }
201
202 /* Allocate N bytes of memory from TRACEE's stack. Return the address
203 of that memory upon success, else 0.
204
205 Place the updated user-mode registers of TRACEE in *NEW_REGS, which
206 should initially contain the current stack pointer of TRACEE.
207
208 REGS should contain the user mode registers of TRACEE prior to the
209 system call starting; it is not updated to reflect any changes. */
210
211 USER_WORD
212 user_alloca (struct exec_tracee *tracee, USER_REGS_STRUCT *regs,
213 USER_REGS_STRUCT *new_regs, USER_WORD n)
214 {
215 USER_WORD sp, old_sp;
216
217 /* Get the current stack pointer. */
218 old_sp = sp = new_regs->STACK_POINTER;
219
220 #if RED_ZONE_SIZE
221 /* Some ABI rules specify a ``red zone'' around the stack pointer
222 that is reserved for compiler optimizations. */
223
224 #ifdef STACK_GROWS_DOWNWARDS
225 if (sp == regs->STACK_POINTER)
226 sp -= RED_ZONE_SIZE;
227 #else /* !STACK_GROWS_DOWNWARDS */
228 if (sp == regs->STACK_POINTER)
229 sp += RED_ZONE_SIZE;
230 #endif /* STACK_GROWS_DOWNWARDS */
231 #endif /* RED_ZONE_SIZE */
232
233 /* Now take N off the stack. */
234
235 #ifdef STACK_GROWS_DOWNWARDS
236 sp = sp - n;
237
238 /* Check for overflow. */
239
240 if (sp > new_regs->STACK_POINTER)
241 return 0;
242 #else /* !STACK_GROWS_DOWNWARDS */
243 sp = sp + n;
244
245 /* Check for overflow. */
246
247 if (sp < new_regs->STACK_POINTER)
248 return 0;
249 #endif /* STACK_GROWS_DOWNWARDS */
250
251 /* Set the stack pointer. */
252 new_regs->STACK_POINTER = sp;
253
254 #ifdef __aarch64__
255 if (aarch64_set_regs (tracee->pid, new_regs, false))
256 goto fail;
257 #else /* !__aarch64__ */
258 if (ptrace (PTRACE_SETREGS, tracee->pid, NULL,
259 new_regs))
260 goto fail;
261 #endif /* __aarch64__ */
262
263 /* Now return the start of the new area. */
264 #ifdef STACK_GROWS_DOWNWARDS
265 return sp;
266 #else /* !STACK_GROWS_DOWNWARDS */
267 return sp - n;
268 #endif /* STACK_GROWS_DOWNWARDS */
269
270 fail:
271 /* Restore the old stack pointer. */
272 new_regs->STACK_POINTER = old_sp;
273 return 0;
274 }
275
276 /* Copy N bytes to ADDRESS in TRACEE's address space from BUFFER.
277 Value is 0 upon success, else 1. */
278
279 int
280 user_copy (struct exec_tracee *tracee, const unsigned char *buffer,
281 USER_WORD address, USER_WORD n)
282 {
283 USER_WORD start, end, word;
284 unsigned char *bytes;
285 #ifdef HAVE_PROCESS_VM
286 struct iovec iov, remote;
287
288 /* Try to use `process_vm_writev' if possible, but fall back to
289 ptrace if something bad happens. */
290
291 iov.iov_base = (void *) buffer;
292 iov.iov_len = n;
293 remote.iov_base = (void *) address;
294 remote.iov_len = n;
295
296 if (n <= SSIZE_MAX
297 && ((size_t) process_vm_writev (tracee->pid, &iov, 1,
298 &remote, 1, 0) == n))
299 return 0;
300 #endif /* HAVE_PROCESS_VM */
301
302 /* Calculate the start and end positions for the write. */
303
304 start = address;
305 end = address + n;
306
307 /* Write from start to the last word. */
308
309 while (start < end)
310 {
311 if (start + sizeof word <= end)
312 {
313 /* Write a word by itself and increment start. */
314 memcpy (&word, buffer, sizeof word);
315 buffer += sizeof word;
316
317 if (ptrace (PTRACE_POKEDATA, tracee->pid,
318 (void *) start, (void *) word))
319 return 1;
320
321 start += sizeof word;
322 }
323 else
324 {
325 /* Only end - start bytes should be written.
326 Read the word at start from tracee->pid, then write
327 it back with changes. */
328
329 word = ptrace (PTRACE_PEEKDATA, tracee->pid,
330 (void *) start, NULL);
331 bytes = (unsigned char *) &word;
332 memcpy (bytes, buffer, end - start);
333
334 if (ptrace (PTRACE_POKEDATA, tracee->pid,
335 (void *) start, (void *) word))
336 return 1;
337
338 /* Writing was successful. */
339 return 0;
340 }
341 }
342
343 return 0;
344 }
345
346
347
348 /* Chain of free exec_tracee structures. */
349 static struct exec_tracee *free_tracees;
350
351 /* Remove the specified TRACEE from the chain of all processes being
352 traced. */
353
354 static void
355 remove_tracee (struct exec_tracee *tracee)
356 {
357 struct exec_tracee **last;
358
359 last = &tracing_processes;
360 while (*last)
361 {
362 if (*last == tracee)
363 {
364 *last = tracee->next;
365
366 /* Link the tracee onto the list of free tracees. */
367 tracee->next = free_tracees;
368
369 #ifndef REENTRANT
370 /* Free the exec file, if any. */
371 free (tracee->exec_file);
372 tracee->exec_file = NULL;
373 #endif /* REENTRANT */
374
375 free_tracees = tracee;
376
377 return;
378 }
379 else
380 last = &(*last)->next;
381 }
382 }
383
384
385
386 /* Child process tracing. */
387
388 /* Array of `struct exec_tracees' that they are allocated from. */
389 static struct exec_tracee static_tracees[MAX_TRACEES];
390
391 /* Number of tracees currently allocated. */
392 static int tracees;
393
394 /* Return the `struct exec_tracee' corresponding to the specified
395 PROCESS. */
396
397 static struct exec_tracee *
398 find_tracee (pid_t process)
399 {
400 struct exec_tracee *tracee;
401
402 for (tracee = tracing_processes; tracee; tracee = tracee->next)
403 {
404 if (tracee->pid == process)
405 return tracee;
406 }
407
408 return NULL;
409 }
410
411 /* Prepare to handle the completion of a `clone' system call.
412
413 If the new clone is not yet being traced, create a new tracee for
414 PARENT's child, copying over its current command line. Then, set
415 `new_child' in the new tracee. Otherwise, continue it until the
416 next syscall. */
417
418 static void
419 handle_clone_prepare (struct exec_tracee *parent)
420 {
421 #ifndef REENTRANT
422 long rc;
423 unsigned long pid;
424 struct exec_tracee *tracee;
425
426 rc = ptrace (PTRACE_GETEVENTMSG, parent->pid, NULL,
427 &pid);
428 if (rc)
429 return;
430
431 /* See if the tracee already exists. */
432 tracee = find_tracee (pid);
433
434 if (tracee)
435 {
436 /* Continue the tracee. Record its command line, as that has
437 not yet been done. */
438
439 assert (tracee->new_child);
440 tracee->new_child = false;
441 tracee->exec_file = NULL;
442 ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0);
443
444 if (parent->exec_file)
445 tracee->exec_file = strdup (parent->exec_file);
446 return;
447 }
448
449 if (free_tracees)
450 {
451 tracee = free_tracees;
452 free_tracees = free_tracees->next;
453 }
454 else if (tracees < MAX_TRACEES)
455 {
456 tracee = &static_tracees[tracees];
457 tracees++;
458 }
459 #ifndef REENTRANT
460 /* Try to allocate a tracee using `malloc' if this library is
461 not being built to run inside a signal handler. */
462 else if ((tracee = malloc (sizeof *tracee)))
463 ;
464 #endif /* REENTRANT */
465 else
466 return;
467
468 tracee->pid = pid;
469 tracee->next = tracing_processes;
470 tracee->waiting_for_syscall = false;
471 tracee->new_child = true;
472 tracee->exec_file = NULL;
473 tracing_processes = tracee;
474
475 /* Copy over the command line. */
476
477 if (parent->exec_file)
478 tracee->exec_file = strdup (parent->exec_file);
479 #endif /* REENTRANT */
480 }
481
482 /* Handle the completion of a `clone' or `clone3' system call,
483 resulting in the creation of the process PID. If TRACEE is NULL,
484 allocate a new tracee structure from a static area for the
485 processes's pid, then set TRACEE->new_child to true and await the
486 parent's corresponding ptrace event to arrive; otherwise, just
487 clear TRACEE->new_child.
488
489 Value is 0 upon success, 2 if TRACEE should remain suspended until
490 the parent's ptrace-stop, and 1 otherwise. */
491
492 static int
493 handle_clone (struct exec_tracee *tracee, pid_t pid)
494 {
495 long rc;
496 int flags, value;
497
498 /* Now allocate a new tracee, either from static_tracees or the free
499 list, if no tracee was supplied. */
500
501 value = 0;
502
503 if (!tracee)
504 {
505 if (free_tracees)
506 {
507 tracee = free_tracees;
508 free_tracees = free_tracees->next;
509 }
510 else if (tracees < MAX_TRACEES)
511 {
512 tracee = &static_tracees[tracees];
513 tracees++;
514 }
515 #ifndef REENTRANT
516 /* Try to allocate a tracee using `malloc' if this library is
517 not being built to run inside a signal handler. */
518 else if ((tracee = malloc (sizeof *tracee)))
519 ;
520 #endif /* REENTRANT */
521 else
522 return 1;
523
524 tracee->pid = pid;
525 tracee->next = tracing_processes;
526 tracee->waiting_for_syscall = false;
527 #ifndef REENTRANT
528 tracee->exec_file = NULL;
529 #endif /* REENTRANT */
530 tracing_processes = tracee;
531 tracee->new_child = true;
532
533 /* Wait for the ptrace-stop to happen in the parent. */
534 value = 2;
535 }
536 else
537 /* Clear the flag saying that this is a newly created child
538 process. */
539 tracee->new_child = false;
540
541 /* Apply required options to the child, so that the kernel
542 automatically traces children and makes it easy to differentiate
543 between system call traps and other kinds of traps. */
544
545 flags = PTRACE_O_TRACECLONE;
546 flags |= PTRACE_O_TRACEVFORK;
547 flags |= PTRACE_O_TRACEFORK;
548 flags |= PTRACE_O_TRACESYSGOOD;
549 flags |= PTRACE_O_TRACEEXIT;
550
551 rc = ptrace (PTRACE_SETOPTIONS, pid, 0, flags);
552
553 if (rc)
554 goto bail;
555
556 if (value != 2)
557 {
558 /* The new tracee is currently stopped. Continue it until the next
559 system call. */
560
561 rc = ptrace (PTRACE_SYSCALL, pid, 0, 0);
562
563 if (rc)
564 goto bail;
565 }
566
567 return value;
568
569 bail:
570 remove_tracee (tracee);
571 return 1;
572 }
573
574
575
576 /* NOTICE: none of these functions should ever call `malloc' or
577 another async signal unsafe function. */
578
579 /* File name of the loader binary. */
580 static const char *loader_name;
581
582
583
584 /* Return whether or not the trap signal described by SIGNAL is
585 generated by a system call being attempted by a tracee. */
586
587 static bool
588 syscall_trap_p (siginfo_t *signal)
589 {
590 /* SIGTRAP delivered by the kernel means this is a system call
591 stop. */
592 return (signal->si_code == SIGTRAP
593 || signal->si_code == (SIGTRAP | SI_KERNEL));
594 }
595
596 /* Check if the wait status STATUS indicates a system call trap.
597 TRACEE is the process whose stop STATUS describes. If TRACEE exits
598 while this information is being determined, return -1; if STATUS
599 indicates some other kind of stop, return 1 after continuing
600 TRACEE. Value is 0 otherwise. */
601
602 static int
603 check_signal (struct exec_tracee *tracee, int status)
604 {
605 siginfo_t siginfo;
606
607 switch ((status & 0xfff00) >> 8)
608 {
609 case SIGTRAP:
610 /* Now, use PTRACE_GETSIGINFO to determine whether or not the
611 signal was delivered in response to a system call. */
612
613 if (ptrace (PTRACE_GETSIGINFO, tracee->pid, 0, &siginfo))
614 return -1;
615
616 if (!syscall_trap_p (&siginfo))
617 {
618 if (siginfo.si_code < 0)
619 /* SIGTRAP delivered from userspace. Pass it on. */
620 ptrace (PTRACE_SYSCALL, tracee->pid, 0, SIGTRAP);
621 else
622 ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0);
623
624 return 1;
625 }
626
627 case SIGTRAP | 0x80: /* SIGTRAP | 0x80 specifically refers to
628 system call traps. */
629 break;
630
631 #ifdef SIGSYS
632 case SIGSYS:
633 if (ptrace (PTRACE_GETSIGINFO, tracee->pid, 0, &siginfo))
634 return -1;
635
636 /* Continue the process until the next syscall, but don't
637 pass through the signal if an emulated syscall led to
638 it. */
639 #ifdef HAVE_SIGINFO_T_SI_SYSCALL
640 #ifndef __arm__
641 ptrace (PTRACE_SYSCALL, tracee->pid,
642 0, ((siginfo.si_code == SYS_SECCOMP
643 && siginfo.si_syscall == -1)
644 ? 0 : status));
645 #else /* __arm__ */
646 ptrace (PTRACE_SYSCALL, tracee->pid,
647 0, ((siginfo.si_code == SYS_SECCOMP
648 && siginfo.si_syscall == 222)
649 ? 0 : status));
650 #endif /* !__arm__ */
651 #else /* !HAVE_SIGINFO_T_SI_SYSCALL */
652 /* Drop this signal, since what caused it is unknown. */
653 ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0);
654 #endif /* HAVE_SIGINFO_T_SI_SYSCALL */
655 return 1;
656 #endif /* SIGSYS */
657
658 default:
659 /* Continue the process until the next syscall. */
660 ptrace (PTRACE_SYSCALL, tracee->pid, 0, status);
661 return 1;
662 }
663
664 return 0;
665 }
666
667
668
669 /* Handle an `exec' system call from the given TRACEE. REGS are the
670 tracee's current user-mode registers.
671
672 Rewrite the system call arguments to use the loader binary. Then,
673 continue the system call until the loader is loaded. Write the
674 information necessary to load the original executable into the
675 loader's stack.
676
677 Value is 0 upon success, 1 upon a generic failure before the loader
678 is loaded, 2 if the process has stopped, and 3 if something failed,
679 but it is too late to handle it.
680
681 Set errno appropriately upon returning a generic failure. */
682
683 static int
684 handle_exec (struct exec_tracee *tracee, USER_REGS_STRUCT *regs)
685 {
686 char buffer[PATH_MAX + 80], *area;
687 USER_REGS_STRUCT original;
688 size_t size, loader_size;
689 USER_WORD loader, size1, sp;
690 int rc, wstatus;
691 siginfo_t siginfo;
692
693 /* Save the old stack pointer. */
694 sp = regs->STACK_POINTER;
695
696 /* Read the file name. */
697 read_memory (tracee, buffer, PATH_MAX,
698 regs->SYSCALL_ARG_REG);
699
700 /* Make sure BUFFER is NULL terminated. */
701
702 if (!memchr (buffer, '\0', PATH_MAX))
703 {
704 errno = ENAMETOOLONG;
705 return 1;
706 }
707
708 /* Copy over the registers as they originally were. */
709 memcpy (&original, regs, sizeof *regs);
710
711 /* Figure out what the loader needs to do. */
712 again1:
713 area = exec_0 (buffer, tracee, &size, regs);
714
715 if (!area)
716 {
717 /* Handle SIGINTR errors caused by IO. */
718 if (errno == EINTR)
719 goto again1;
720
721 return 1;
722 }
723
724 /* Rewrite the first argument to point to the loader. */
725
726 loader_size = strlen (loader_name) + 1;
727 loader = user_alloca (tracee, &original, regs,
728 loader_size);
729
730 if (!loader)
731 {
732 errno = ENOMEM;
733 return 1;
734 }
735
736 if (user_copy (tracee, (unsigned char *) loader_name,
737 loader, loader_size))
738 {
739 errno = EIO;
740 return 1;
741 }
742
743 regs->SYSCALL_ARG_REG = loader;
744
745 #ifdef __aarch64__
746
747 if (aarch64_set_regs (tracee->pid, regs, false))
748 {
749 errno = EIO;
750 return 1;
751 }
752
753 #else /* !__aarch64__ */
754
755 if (ptrace (PTRACE_SETREGS, tracee->pid, NULL,
756 regs))
757 {
758 errno = EIO;
759 return 1;
760 }
761
762 #endif /* __aarch64__ */
763
764 /* Continue the system call until loader starts. */
765
766 if (ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL))
767 {
768 errno = EIO;
769 return 1;
770 }
771
772 #ifndef REENTRANT
773 /* Now that the loader has started, record the value to use for
774 /proc/self/exe. Don't give up just because strdup fails.
775
776 Note that exec_0 copies the absolute file name into buffer. */
777
778 if (tracee->exec_file)
779 free (tracee->exec_file);
780 tracee->exec_file = strdup (buffer);
781 #endif /* REENTRANT */
782
783 again:
784 rc = waitpid (tracee->pid, &wstatus, __WALL);
785 if (rc == -1 && errno == EINTR)
786 goto again;
787
788 if (rc < 0)
789 return 1;
790
791 if (!WIFSTOPPED (wstatus))
792 /* The process has been killed in response to a signal.
793 In this case, simply return 2. */
794 return 2;
795 else
796 {
797 /* Then, check if STATUS is not a syscall-stop, and try again if
798 it isn't. */
799 rc = check_signal (tracee, wstatus);
800
801 if (rc == -1)
802 return 2;
803 else if (rc)
804 goto again;
805
806 /* Retrieve the signal information and determine whether or not
807 the system call has completed. */
808
809 if (ptrace (PTRACE_GETSIGINFO, tracee->pid, 0,
810 &siginfo))
811 return 3;
812
813 if (!syscall_trap_p (&siginfo))
814 {
815 /* Continue. */
816 if (ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0))
817 return 3;
818
819 goto again;
820 }
821 }
822
823 #ifdef __aarch64__
824
825 if (aarch64_get_regs (tracee->pid, &original))
826 return 3;
827
828 #else /* !__aarch64__ */
829
830 /* The system call has now completed. Get the registers again. */
831
832 if (ptrace (PTRACE_GETREGS, tracee->pid, NULL,
833 &original))
834 return 3;
835
836 #endif /* __aarch64__ */
837
838 *regs = original;
839
840 /* Upon failure, wait for the next system call and return
841 success. */
842
843 if (original.SYSCALL_RET_REG)
844 {
845 /* Restore the original stack pointer. */
846 regs->STACK_POINTER = sp;
847
848 #ifdef __aarch64__
849 aarch64_set_regs (tracee->pid, regs, false);
850 #else /* !__aarch64__ */
851 ptrace (PTRACE_SETREGS, tracee->pid, NULL, regs);
852 #endif /* __aarch64__ */
853
854 goto exec_failure;
855 }
856
857 /* Write the loader area to the stack, followed by its size and the
858 original stack pointer. */
859
860 loader = user_alloca (tracee, &original, regs,
861 size + sizeof loader * 2);
862 if (!loader)
863 return 3;
864
865 size1 = size;
866
867 #ifndef STACK_GROWS_DOWNWARDS
868
869 NOT_IMPLEMENTED;
870
871 #else /* STACK_GROWS_DOWNWARDS */
872
873 if (user_copy (tracee, (unsigned char *) area,
874 loader + sizeof size1 * 2, size)
875 || user_copy (tracee, (unsigned char *) &size1,
876 loader + sizeof size1, sizeof size1))
877 return 3;
878
879 size1 = original.STACK_POINTER;
880
881 if (user_copy (tracee, (unsigned char *) &size1,
882 loader, sizeof size1))
883 return 3;
884
885 #endif /* STACK_GROWS_DOWNWARDS */
886
887 /* Continue. */
888 if (ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0))
889 return 3;
890
891 return 0;
892
893 exec_failure:
894 return 3;
895 }
896
897 /* Handle a `readlink' or `readlinkat' system call.
898
899 CALLNO is the system call number, and REGS are the current user
900 registers of the TRACEE.
901
902 If the file name specified in either a `readlink' or `readlinkat'
903 system call is `/proc/self/exe', write the name of the executable
904 being run into the buffer specified in the system call. Do not
905 handle relative file names at the moment.
906
907 Return the number of bytes written to the tracee's buffer in
908 *RESULT.
909
910 Value is 0 upon success. Value is 1 upon failure, and 2 if the
911 system call has been emulated. */
912
913 static int
914 handle_readlinkat (USER_WORD callno, USER_REGS_STRUCT *regs,
915 struct exec_tracee *tracee, USER_WORD *result)
916 {
917 #ifdef REENTRANT
918 /* readlinkat cannot be handled specially when the library is built
919 to be reentrant, as the file name information cannot be
920 recorded. */
921 return 0;
922 #else /* !REENTRANT */
923
924 char buffer[PATH_MAX + 1];
925 USER_WORD address, return_buffer, size;
926 size_t length;
927
928 /* Read the file name. */
929
930 #ifdef READLINK_SYSCALL
931 if (callno == READLINK_SYSCALL)
932 {
933 address = regs->SYSCALL_ARG_REG;
934 return_buffer = regs->SYSCALL_ARG1_REG;
935 size = regs->SYSCALL_ARG2_REG;
936 }
937 else
938 #endif /* READLINK_SYSCALL */
939 {
940 address = regs->SYSCALL_ARG1_REG;
941 return_buffer = regs->SYSCALL_ARG2_REG;
942 size = regs->SYSCALL_ARG3_REG;
943 }
944
945 read_memory (tracee, buffer, PATH_MAX, address);
946
947 /* Make sure BUFFER is NULL terminated. */
948
949 if (!memchr (buffer, '\0', PATH_MAX))
950 {
951 errno = ENAMETOOLONG;
952 return 1;
953 }
954
955 /* Now check if the caller is looking for /proc/self/exe.
956
957 dirfd can be ignored, as for now only absolute file names are
958 handled. FIXME. */
959
960 if (strcmp (buffer, "/proc/self/exe") || !tracee->exec_file)
961 return 0;
962
963 /* Copy over tracee->exec_file. Truncate it to PATH_MAX, length, or
964 size, whichever is less. */
965
966 length = strlen (tracee->exec_file);
967 length = MIN (size, MIN (PATH_MAX, length));
968 strncpy (buffer, tracee->exec_file, length);
969
970 if (user_copy (tracee, (unsigned char *) buffer,
971 return_buffer, length))
972 {
973 errno = EIO;
974 return 1;
975 }
976
977 *result = length;
978 return 2;
979 #endif /* REENTRANT */
980 }
981
982 /* Process the system call at which TRACEE is stopped. If the system
983 call is not known or not exec, send TRACEE on its way. Otherwise,
984 rewrite it to load the loader and perform an appropriate action. */
985
986 static void
987 process_system_call (struct exec_tracee *tracee)
988 {
989 USER_REGS_STRUCT regs;
990 int rc, wstatus, save_errno;
991 USER_WORD callno, sp;
992 USER_WORD result;
993 bool reporting_error;
994
995 #ifdef __aarch64__
996 rc = aarch64_get_regs (tracee->pid, ®s);
997 #else /* !__aarch64__ */
998 rc = ptrace (PTRACE_GETREGS, tracee->pid, NULL,
999 ®s);
1000 #endif /* __aarch64__ */
1001
1002 /* TODO: what to do if this fails? */
1003 if (rc < 0)
1004 return;
1005
1006 /* Save the stack pointer. */
1007 sp = regs.STACK_POINTER;
1008
1009 /* Now dispatch based on the system call. */
1010 callno = regs.SYSCALL_NUM_REG;
1011 switch (callno)
1012 {
1013 case EXEC_SYSCALL:
1014
1015 /* exec system calls should be handled synchronously. */
1016 assert (!tracee->waiting_for_syscall);
1017 rc = handle_exec (tracee, ®s);
1018
1019 switch (rc)
1020 {
1021 case 3:
1022 /* It's too late to do anything about this error,. */
1023 break;
1024
1025 case 2:
1026 /* The process has gone away. */
1027 remove_tracee (tracee);
1028 break;
1029
1030 case 1:
1031 /* An error has occured; errno is set to the error. */
1032 goto report_syscall_error;
1033 }
1034
1035 break;
1036
1037 #ifdef READLINK_SYSCALL
1038 case READLINK_SYSCALL:
1039 #endif /* READLINK_SYSCALL */
1040 case READLINKAT_SYSCALL:
1041
1042 /* Handle this readlinkat system call. */
1043 rc = handle_readlinkat (callno, ®s, tracee,
1044 &result);
1045
1046 /* rc means the same as in `handle_exec'. */
1047
1048 if (rc == 1)
1049 goto report_syscall_error;
1050 else if (rc == 2)
1051 goto emulate_syscall;
1052
1053 /* Fallthrough. */
1054
1055 default:
1056 /* Don't wait for the system call to finish; instead, the system
1057 will DTRT upon the next call to PTRACE_SYSCALL after the
1058 syscall-trap signal is delivered. */
1059
1060 rc = ptrace (PTRACE_SYSCALL, tracee->pid,
1061 NULL, NULL);
1062 if (rc < 0)
1063 return;
1064
1065 tracee->waiting_for_syscall = !tracee->waiting_for_syscall;
1066 }
1067
1068 return;
1069
1070 report_syscall_error:
1071 reporting_error = true;
1072 goto common;
1073
1074 emulate_syscall:
1075 reporting_error = false;
1076 common:
1077
1078 /* Reporting an error or emulating a system call works by setting
1079 the system call number to -1, letting it continue, and then
1080 substituting errno for ENOSYS in the case of an error.
1081
1082 Make sure that the stack pointer is restored to its original
1083 position upon exit, or bad things can happen. */
1084
1085 /* First, save errno; system calls below will clobber it. */
1086 save_errno = errno;
1087
1088 regs.SYSCALL_NUM_REG = -1;
1089 regs.STACK_POINTER = sp;
1090
1091 #ifdef __aarch64__
1092 if (aarch64_set_regs (tracee->pid, ®s, true))
1093 return;
1094 #else /* !__aarch64__ */
1095
1096 #ifdef __arm__
1097 /* On ARM systems, a special request is used to update the system
1098 call number as known to the kernel. In addition, the system call
1099 number must be valid, so use `tuxcall'. Hopefully, nobody will
1100 run this on a kernel with Tux. */
1101
1102 if (ptrace (PTRACE_SET_SYSCALL, tracee->pid, NULL, 222))
1103 return;
1104 #endif /* __arm__ */
1105
1106 if (ptrace (PTRACE_SETREGS, tracee->pid, NULL, ®s))
1107 return;
1108 #endif /* __aarch64__ */
1109
1110 /* Do this invalid system call. */
1111 if (ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL))
1112 return;
1113
1114 again1:
1115 rc = waitpid (tracee->pid, &wstatus, __WALL);
1116 if (rc == -1 && errno == EINTR)
1117 goto again1;
1118
1119 /* Return if waitpid fails. */
1120
1121 if (rc == -1)
1122 return;
1123
1124 /* If the process received a signal, see if the signal is SIGSYS and
1125 from seccomp. If so, discard it. */
1126
1127 if (WIFSTOPPED (wstatus))
1128 {
1129 rc = check_signal (tracee, wstatus);
1130
1131 if (rc == -1)
1132 return;
1133 else if (rc)
1134 goto again1;
1135 }
1136
1137 if (!WIFSTOPPED (wstatus))
1138 /* The process has been killed in response to a signal. In this
1139 case, simply unlink the tracee and return. */
1140 remove_tracee (tracee);
1141 else if (reporting_error)
1142 {
1143 #ifdef __mips__
1144 /* MIPS systems place errno in v0 and set a3 to 1. */
1145 regs.gregs[2] = save_errno;
1146 regs.gregs[7] = 1;
1147 #else /* !__mips__ */
1148 regs.SYSCALL_RET_REG = -save_errno;
1149 #endif /* __mips__ */
1150
1151 /* Report errno. */
1152 #ifdef __aarch64__
1153 aarch64_set_regs (tracee->pid, ®s, false);
1154 #else /* !__aarch64__ */
1155 ptrace (PTRACE_SETREGS, tracee->pid, NULL, ®s);
1156 #endif /* __aarch64__ */
1157
1158 /* Now wait for the next system call to happen. */
1159 ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL);
1160 }
1161 else
1162 {
1163 /* No error is being reported. Return the result in the
1164 appropriate registers. */
1165
1166 #ifdef __mips__
1167 /* MIPS systems place errno in v0 and set a3 to 1. */
1168 regs.gregs[2] = result;
1169 regs.gregs[7] = 0;
1170 #else /* !__mips__ */
1171 regs.SYSCALL_RET_REG = result;
1172 #endif /* __mips__ */
1173
1174 /* Report errno. */
1175 #ifdef __aarch64__
1176 aarch64_set_regs (tracee->pid, ®s, false);
1177 #else /* !__aarch64__ */
1178 ptrace (PTRACE_SETREGS, tracee->pid, NULL, ®s);
1179 #endif /* __aarch64__ */
1180
1181 /* Now wait for the next system call to happen. */
1182 ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL);
1183 }
1184 }
1185
1186
1187
1188 /* Like `execve', but asks the parent to begin tracing this thread.
1189 Fail if tracing is unsuccessful. */
1190
1191 int
1192 tracing_execve (const char *file, char *const *argv,
1193 char *const *envp)
1194 {
1195 int rc;
1196
1197 /* Start tracing self. */
1198 rc = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
1199 if (rc)
1200 return rc;
1201
1202 /* Notify the parent to enter signal-delivery-stop. */
1203 raise (SIGSTOP);
1204 return execve (file, argv, envp);
1205 }
1206
1207 /* Wait for PID to trace itself, and make a record of that process.
1208 Value is 1 or 2 upon failure, 0 otherwise. Make sure that SIGCHLD
1209 is blocked around calls to this function.
1210
1211 If failure occurs because PID exited, value is 2; upon any other
1212 kind of failure, value is 1. */
1213
1214 int
1215 after_fork (pid_t pid)
1216 {
1217 int wstatus, rc, flags;
1218 struct exec_tracee *tracee;
1219
1220 /* First, wait for something to happen to PID. */
1221 again:
1222 rc = waitpid (pid, &wstatus, __WALL);
1223 if (rc != pid && errno == EINTR)
1224 goto again;
1225
1226 if (rc != pid)
1227 return 1;
1228
1229 /* If the child exited (or in general wasn't traced), return 2. */
1230
1231 if (!WIFSTOPPED (wstatus))
1232 return 2;
1233
1234 /* Apply required options to the child, so that the kernel
1235 automatically traces children and makes it easy to differentiate
1236 between system call traps and other kinds of traps. */
1237
1238 flags = PTRACE_O_TRACECLONE;
1239 flags |= PTRACE_O_TRACEVFORK;
1240 flags |= PTRACE_O_TRACEFORK;
1241 flags |= PTRACE_O_TRACESYSGOOD;
1242 flags |= PTRACE_O_TRACEEXIT;
1243
1244 rc = ptrace (PTRACE_SETOPTIONS, pid, 0, flags);
1245
1246 if (rc)
1247 {
1248 /* If the kernel can't trace child processes upon creation and
1249 exit, then it can't work reliably. */
1250 ptrace (PTRACE_DETACH, pid, 0, 0);
1251 return 1;
1252 }
1253
1254 /* Request that the child stop upon the next system call. */
1255 rc = ptrace (PTRACE_SYSCALL, pid, 0, 0);
1256 if (rc)
1257 return 1;
1258
1259 /* Enter the child in `tracing_processes'. */
1260
1261 if (free_tracees)
1262 {
1263 tracee = free_tracees;
1264 free_tracees = free_tracees->next;
1265 }
1266 else
1267 tracee = malloc (sizeof *tracee);
1268
1269 if (!tracee)
1270 return 1;
1271
1272 tracee->pid = pid;
1273 tracee->next = tracing_processes;
1274 tracee->waiting_for_syscall = false;
1275 tracee->new_child = false;
1276 #ifndef REENTRANT
1277 tracee->exec_file = NULL;
1278 #endif /* REENTRANT */
1279 tracing_processes = tracee;
1280 return 0;
1281 }
1282
1283 /* Wait for a child process to exit, like `waitpid'. However, if a
1284 child stops to perform a system call, send it on its way and return
1285 -1. OPTIONS must not contain WUNTRACED. */
1286
1287 pid_t
1288 exec_waitpid (pid_t pid, int *wstatus, int options)
1289 {
1290 int status;
1291 struct exec_tracee *tracee;
1292 siginfo_t siginfo;
1293
1294 pid = waitpid (pid, &status, options | __WALL);
1295 if (pid < 0)
1296 return pid;
1297
1298 /* Copy status into *WSTATUS if specified. */
1299 if (wstatus)
1300 *wstatus = status;
1301
1302 /* WIFSTOPPED (status) means that the process has been stopped in
1303 response to a system call. Find its tracee and process the
1304 system call. */
1305
1306 if (WIFSTOPPED (status))
1307 {
1308 tracee = find_tracee (pid);
1309
1310 if (!tracee || tracee->new_child)
1311 {
1312 if (WSTOPSIG (status) == SIGSTOP)
1313 /* A new process has been created and stopped. Record
1314 it now. */
1315 handle_clone (tracee, pid);
1316
1317 return -1;
1318 }
1319
1320 /* Now extract the stop signal, including ptrace event bits. */
1321 status &= 0xfff00;
1322 status = status >> 8;
1323
1324 switch (status)
1325 {
1326 case SIGTRAP:
1327 /* Now, use PTRACE_GETSIGINFO to determine whether or not the
1328 signal was delivered in response to a system call. */
1329
1330 if (ptrace (PTRACE_GETSIGINFO, pid, 0, &siginfo))
1331 return -1;
1332
1333 if (!syscall_trap_p (&siginfo))
1334 {
1335 if (siginfo.si_code < 0)
1336 /* SIGTRAP delivered from userspace. Pass it on. */
1337 ptrace (PTRACE_SYSCALL, pid, 0, SIGTRAP);
1338 else
1339 ptrace (PTRACE_SYSCALL, pid, 0, 0);
1340
1341 return -1;
1342 }
1343
1344 case SIGTRAP | 0x80: /* SIGTRAP | 0x80 specifically refers to
1345 system call traps. */
1346 /* Otherwise, process the system call and continue waiting. */
1347 process_system_call (tracee);
1348 return -1;
1349
1350 case SIGTRAP | (PTRACE_EVENT_EXIT << 8):
1351 /* The tracee has exited. Make it finish correctly. */
1352 ptrace (PTRACE_SYSCALL, pid, 0, 0);
1353 remove_tracee (tracee);
1354 return -1;
1355
1356 case SIGTRAP | (PTRACE_EVENT_FORK << 8):
1357 case SIGTRAP | (PTRACE_EVENT_VFORK << 8):
1358 case SIGTRAP | (PTRACE_EVENT_CLONE << 8):
1359
1360 /* Both PTRACE_EVENT_CLONE and SIGSTOP must arrive before a
1361 process is continued. Otherwise, its parent's cmdline
1362 cannot be obtained and propagated.
1363
1364 If the PID of the new process is currently not being
1365 traced, create a new tracee. Set `new_child' to true,
1366 and copy over the old command line in preparation for a
1367 SIGSTOP signal being delivered to it.
1368
1369 Otherwise, start the tracee running until the next
1370 syscall. */
1371
1372 handle_clone_prepare (tracee);
1373
1374 /* These events are handled by tracing SIGSTOP signals sent
1375 to unknown tracees. Make sure not to pass through
1376 status, as there's no signal really being delivered. */
1377 ptrace (PTRACE_SYSCALL, pid, 0, 0);
1378 return -1;
1379
1380 #ifdef SIGSYS
1381 case SIGSYS:
1382 if (ptrace (PTRACE_GETSIGINFO, pid, 0, &siginfo))
1383 return -1;
1384
1385 /* Continue the process until the next syscall, but don't
1386 pass through the signal if an emulated syscall led to
1387 it. */
1388 #ifdef HAVE_SIGINFO_T_SI_SYSCALL
1389 #ifndef __arm__
1390 ptrace (PTRACE_SYSCALL, pid, 0, ((siginfo.si_code == SYS_SECCOMP
1391 && siginfo.si_syscall == -1)
1392 ? 0 : status));
1393 #else /* __arm__ */
1394 ptrace (PTRACE_SYSCALL, pid, 0, ((siginfo.si_code == SYS_SECCOMP
1395 && siginfo.si_syscall == 222)
1396 ? 0 : status));
1397 #endif /* !__arm__ */
1398 #else /* !HAVE_SIGINFO_T_SI_SYSCALL */
1399 /* Drop this signal, since what caused it is unknown. */
1400 ptrace (PTRACE_SYSCALL, pid, 0, 0);
1401 #endif /* HAVE_SIGINFO_T_SI_SYSCALL */
1402 return -1;
1403 #endif /* SIGSYS */
1404
1405 default:
1406 /* Continue the process until the next syscall. */
1407 ptrace (PTRACE_SYSCALL, pid, 0, status);
1408 return -1;
1409 }
1410 }
1411 else
1412 {
1413 /* The process has exited. Unlink the associated tracee. */
1414 tracee = find_tracee (pid);
1415
1416 if (tracee)
1417 remove_tracee (tracee);
1418
1419 return pid;
1420 }
1421 }
1422
1423
1424
1425 /* Initialize the exec library. LOADER should be the file name of the
1426 loader binary; it is not copied. */
1427
1428 void
1429 exec_init (const char *loader)
1430 {
1431 loader_name = loader;
1432 }