#ifndef _STP_UTRACE_H #define _STP_UTRACE_H #include #include #include #include /* * Event bits passed to utrace_set_events(). * These appear in &struct task_struct.@utrace_flags * and &struct utrace_engine.@flags. */ enum utrace_events { _UTRACE_EVENT_QUIESCE, /* Thread is available for examination. */ _UTRACE_EVENT_REAP, /* Zombie reaped, no more tracing possible. */ _UTRACE_EVENT_CLONE, /* Successful clone/fork/vfork just done. */ _UTRACE_EVENT_EXEC, /* Successful execve just completed. */ _UTRACE_EVENT_DEATH, /* Thread has died. */ _UTRACE_EVENT_SYSCALL_ENTRY, /* User entered kernel for system call. */ _UTRACE_EVENT_SYSCALL_EXIT, /* Returning to user after system call. */ _UTRACE_NEVENTS }; #define UTRACE_EVENT(type) (1UL << _UTRACE_EVENT_##type) /* * Both kinds of syscall events; these call the @report_syscall_entry() * and @report_syscall_exit() callbacks, respectively. */ #define UTRACE_EVENT_SYSCALL \ (UTRACE_EVENT(SYSCALL_ENTRY) | UTRACE_EVENT(SYSCALL_EXIT)) /* * The event reports triggered synchronously by task death. */ #define _UTRACE_DEATH_EVENTS (UTRACE_EVENT(DEATH) | UTRACE_EVENT(QUIESCE)) /* * Flags for utrace_attach_task(). */ #define UTRACE_ATTACH_MATCH_OPS 0x0001 /* Match engines on ops. */ #define UTRACE_ATTACH_MATCH_DATA 0x0002 /* Match engines on data. */ #define UTRACE_ATTACH_MATCH_MASK 0x000f #define UTRACE_ATTACH_CREATE 0x0010 /* Attach a new engine. */ #define UTRACE_ATTACH_EXCLUSIVE 0x0020 /* Refuse if existing match. */ /** * struct utrace_engine - per-engine structure * @ops: &struct utrace_engine_ops pointer passed to utrace_attach_task() * @data: engine-private &void * passed to utrace_attach_task() * @flags: event mask set by utrace_set_events() plus internal flag bits * * The task itself never has to worry about engines detaching while * it's doing event callbacks. These structures are removed from the * task's active list only when it's stopped, or by the task itself. * * utrace_engine_get() and utrace_engine_put() maintain a reference count. * When it drops to zero, the structure is freed. One reference is held * implicitly while the engine is attached to its task. */ struct utrace_engine { /* private: */ struct kref kref; void (*release)(void *); struct list_head entry; /* public: */ const struct utrace_engine_ops *ops; void *data; unsigned long flags; }; static int utrace_init(void); static int utrace_exit(void); static void utrace_shutdown(void); /** * utrace_engine_get - acquire a reference on a &struct utrace_engine * @engine: &struct utrace_engine pointer * * You must hold a reference on @engine, and you get another. */ static inline void utrace_engine_get(struct utrace_engine *engine) { kref_get(&engine->kref); } static void __utrace_engine_release(struct kref *); /** * utrace_engine_put - release a reference on a &struct utrace_engine * @engine: &struct utrace_engine pointer * * You must hold a reference on @engine, and you lose that reference. * If it was the last one, @engine becomes an invalid pointer. */ static inline void utrace_engine_put(struct utrace_engine *engine) { kref_put(&engine->kref, __utrace_engine_release); } /** * struct utrace_engine_ops - tracing engine callbacks * * Each @report_*() callback corresponds to an %UTRACE_EVENT(*) bit. * utrace_set_events() calls on @engine choose which callbacks will * be made to @engine from @task. * * Most callbacks take an @action argument, giving the resume action * chosen by other tracing engines. All callbacks take an @engine * argument. The @report_reap callback takes a @task argument that * might or might not be @current. All other @report_* callbacks * report an event in the @current task. * * For some calls, @action also includes bits specific to that event * and utrace_resume_action() is used to extract the resume action. * This shows what would happen if @engine wasn't there, or will if * the callback's return value uses %UTRACE_RESUME. This always * starts as %UTRACE_RESUME when no other tracing is being done on * this task. * * All return values contain &enum utrace_resume_action bits. For * some calls, other bits specific to that kind of event are added to * the resume action bits with OR. These are the same bits used in * the @action argument. The resume action returned by a callback * does not override previous engines' choices, it only says what * @engine wants done. What @current actually does is the action that's * most constrained among the choices made by all attached engines. * See utrace_control() for more information on the actions. * * When %UTRACE_STOP is used in @report_syscall_entry, then @current * stops before attempting the system call. In this case, another * @report_syscall_entry callback will follow after @current resumes * if %UTRACE_REPORT or %UTRACE_INTERRUPT was returned by some * callback or passed to utrace_control(). In a second or later * callback, %UTRACE_SYSCALL_RESUMED is set in the @action argument to * indicate a repeat callback still waiting to attempt the same system * call invocation. This repeat callback gives each engine an * opportunity to reexamine registers another engine might have * changed while @current was held in %UTRACE_STOP. * * In other cases, the resume action does not take effect until @current * is ready to check for signals and return to user mode. If there * are more callbacks to be made, the last round of calls determines * the final action. A @report_quiesce callback with @event zero * will always be the last one made before * @current resumes. Only %UTRACE_STOP is "sticky"--if @engine returned * %UTRACE_STOP then @current stays stopped unless @engine returns * different from a following callback. * * The report_death() and report_reap() callbacks do not take @action * arguments, and only %UTRACE_DETACH is meaningful in the return value * from a report_death() callback. None of the resume actions applies * to a dead thread. * * All @report_*() hooks are called with no locks held, in a generally * safe environment when we will be returning to user mode soon (or just * entered the kernel). It is fine to block for memory allocation and * the like, but all hooks are asynchronous and must not block on * external events! If you want the thread to block, use %UTRACE_STOP * in your hook's return value; then later wake it up with utrace_control(). * * @report_quiesce: * Requested by %UTRACE_EVENT(%QUIESCE). * This does not indicate any event, but just that @current is in a * safe place for examination. This call is made before each specific * event callback, except for @report_reap. The @event argument gives * the %UTRACE_EVENT(@which) value for the event occurring. This * callback might be made for events @engine has not requested, if * some other engine is tracing the event; calling utrace_set_events() * call here can request the immediate callback for this occurrence of * @event. @event is zero when there is no other event, @current is * now ready to check for signals and return to user mode, and some * engine has used %UTRACE_REPORT or %UTRACE_INTERRUPT to request * this callback. * * @report_clone: * Requested by %UTRACE_EVENT(%CLONE). * Event reported for parent, before the new task @child might run. * @clone_flags gives the flags used in the clone system call, or * equivalent flags for a fork() or vfork() system call. This * function can use utrace_attach_task() on @child. Then passing * %UTRACE_STOP to utrace_control() on @child here keeps the child * stopped before it ever runs in user mode, %UTRACE_REPORT or * %UTRACE_INTERRUPT ensures a callback from @child before it * starts in user mode. * * @report_exec: * Requested by %UTRACE_EVENT(%EXEC). * An execve system call has succeeded and the new program is about to * start running. The initial user register state is handy to be tweaked * directly in @regs. @fmt and @bprm gives the details of this exec. * * @report_syscall_entry: * Requested by %UTRACE_EVENT(%SYSCALL_ENTRY). * Thread has entered the kernel to request a system call. * The user register state is handy to be tweaked directly in @regs. * The @action argument contains an &enum utrace_syscall_action, * use utrace_syscall_action() to extract it. The return value * overrides the last engine's action for the system call. * If the final action is %UTRACE_SYSCALL_ABORT, no system call * is made. The details of the system call being attempted can * be fetched here with syscall_get_nr() and syscall_get_arguments(). * The parameter registers can be changed with syscall_set_arguments(). * See above about the %UTRACE_SYSCALL_RESUMED flag in @action. * Use %UTRACE_REPORT in the return value to guarantee you get * another callback (with %UTRACE_SYSCALL_RESUMED flag) in case * @current stops with %UTRACE_STOP before attempting the system call. * * @report_syscall_exit: * Requested by %UTRACE_EVENT(%SYSCALL_EXIT). * Thread is about to leave the kernel after a system call request. * The user register state is handy to be tweaked directly in @regs. * The results of the system call attempt can be examined here using * syscall_get_error() and syscall_get_return_value(). It is safe * here to call syscall_set_return_value() or syscall_rollback(). * * @report_death: * Requested by %UTRACE_EVENT(%DEATH). * Thread is really dead now. It might be reaped by its parent at * any time, or self-reap immediately. Though the actual reaping * may happen in parallel, a report_reap() callback will always be * ordered after a report_death() callback. * * @report_reap: * Requested by %UTRACE_EVENT(%REAP). * Called when someone reaps the dead task (parent, init, or self). * This means the parent called wait, or else this was a detached * thread or a process whose parent ignores SIGCHLD. * No more callbacks are made after this one. * The engine is always detached. * There is nothing more a tracing engine can do about this thread. * After this callback, the @engine pointer will become invalid. * The @task pointer may become invalid if get_task_struct() hasn't * been used to keep it alive. * An engine should always request this callback if it stores the * @engine pointer or stores any pointer in @engine->data, so it * can clean up its data structures. * Unlike other callbacks, this can be called from the parent's context * rather than from the traced thread itself--it must not delay the * parent by blocking. * * @release: * If not %NULL, this is called after the last utrace_engine_put() * call for a &struct utrace_engine, which could be implicit after * a %UTRACE_DETACH return from another callback. Its argument is * the engine's @data member. */ struct utrace_engine_ops { u32 (*report_quiesce)(u32 action, struct utrace_engine *engine, unsigned long event); u32 (*report_clone)(u32 action, struct utrace_engine *engine, unsigned long clone_flags, struct task_struct *child); u32 (*report_exec)(u32 action, struct utrace_engine *engine, const struct linux_binfmt *fmt, const struct linux_binprm *bprm, struct pt_regs *regs); u32 (*report_syscall_entry)(u32 action, struct utrace_engine *engine, struct pt_regs *regs); u32 (*report_syscall_exit)(u32 action, struct utrace_engine *engine, struct pt_regs *regs); u32 (*report_death)(struct utrace_engine *engine, bool group_dead, int signal); void (*report_reap)(struct utrace_engine *engine, struct task_struct *task); void (*release)(void *data); }; /* * These are the exported entry points for tracing engines to use. * See stp_utrace.c for their kerneldoc comments with interface details. */ static struct utrace_engine *utrace_attach_task(struct task_struct *, int, const struct utrace_engine_ops *, void *); static int __must_check utrace_set_events(struct task_struct *, struct utrace_engine *, unsigned long eventmask); static int __must_check utrace_barrier(struct task_struct *, struct utrace_engine *); /* * Version number of the API defined in this file. This will change * whenever a tracing engine's code would need some updates to keep * working. We maintain this here for the benefit of tracing engine code * that is developed concurrently with utrace API improvements before they * are merged into the kernel, making LINUX_VERSION_CODE checks unwieldy. */ #define UTRACE_API_VERSION 20110727 /** * enum utrace_resume_action - engine's choice of action for a traced task * @UTRACE_STOP: Stay quiescent after callbacks. * @UTRACE_INTERRUPT: Make quiesce callback soon. * @UTRACE_REPORT: Make some callback soon. * @UTRACE_RESUME: Resume normally in user mode. * @UTRACE_DETACH: Detach my engine (implies %UTRACE_RESUME). * * See utrace_control() for detailed descriptions of each action. This is * encoded in the @action argument and the return value for every callback * with a &u32 return value. * * The order of these is important. When there is more than one engine, * each supplies its choice and the smallest value prevails. */ enum utrace_resume_action { UTRACE_STOP, UTRACE_INTERRUPT, UTRACE_REPORT, UTRACE_RESUME, UTRACE_DETACH, UTRACE_RESUME_MAX }; #define UTRACE_RESUME_BITS (ilog2(UTRACE_RESUME_MAX) + 1) #define UTRACE_RESUME_MASK ((1 << UTRACE_RESUME_BITS) - 1) /** * utrace_resume_action - &enum utrace_resume_action from callback action * @action: &u32 callback @action argument or return value * * This extracts the &enum utrace_resume_action from @action, * which is the @action argument to a &struct utrace_engine_ops * callback or the return value from one. */ static inline enum utrace_resume_action utrace_resume_action(u32 action) { return action & UTRACE_RESUME_MASK; } #endif /* _STP_UTRACE_H */