rustix/thread/
prctl.rs

1//! Linux `prctl` wrappers.
2//!
3//! Rustix wraps variadic/dynamic-dispatch functions like `prctl` in type-safe
4//! wrappers.
5//!
6//! # Safety
7//!
8//! The inner `prctl` calls are dynamically typed and must be called correctly.
9#![allow(unsafe_code)]
10
11use core::mem::MaybeUninit;
12use core::num::NonZeroU64;
13use core::ptr;
14use core::ptr::NonNull;
15use core::sync::atomic::AtomicU8;
16
17use bitflags::bitflags;
18
19use crate::backend::prctl::syscalls;
20#[cfg(feature = "alloc")]
21use crate::ffi::CString;
22use crate::ffi::{c_int, c_uint, c_void, CStr};
23use crate::io;
24use crate::io::Errno;
25use crate::pid::Pid;
26#[cfg(linux_raw_dep)]
27use crate::prctl::PointerAuthenticationKeys;
28use crate::prctl::{prctl_1arg, prctl_2args, prctl_3args, prctl_get_at_arg2_optional};
29use crate::utils::as_ptr;
30
31use super::CapabilitySet;
32
33//
34// PR_GET_KEEPCAPS/PR_SET_KEEPCAPS
35//
36
37const PR_GET_KEEPCAPS: c_int = 7;
38
39/// Get the current state of the calling thread's `keep capabilities` flag.
40///
41/// # References
42///  - [`prctl(PR_GET_KEEPCAPS,…)`]
43///
44/// [`prctl(PR_GET_KEEPCAPS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
45#[inline]
46pub fn get_keep_capabilities() -> io::Result<bool> {
47    unsafe { prctl_1arg(PR_GET_KEEPCAPS) }.map(|r| r != 0)
48}
49
50const PR_SET_KEEPCAPS: c_int = 8;
51
52/// Set the state of the calling thread's `keep capabilities` flag.
53///
54/// # References
55///  - [`prctl(PR_SET_KEEPCAPS,…)`]
56///
57/// [`prctl(PR_SET_KEEPCAPS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
58#[inline]
59pub fn set_keep_capabilities(enable: bool) -> io::Result<()> {
60    unsafe { prctl_2args(PR_SET_KEEPCAPS, usize::from(enable) as *mut _) }.map(|_r| ())
61}
62
63//
64// PR_GET_NAME/PR_SET_NAME
65//
66
67#[cfg(feature = "alloc")]
68const PR_GET_NAME: c_int = 16;
69
70/// Get the name of the calling thread.
71///
72/// # References
73///  - [`prctl(PR_GET_NAME,…)`]
74///
75/// [`prctl(PR_GET_NAME,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
76#[inline]
77#[cfg(feature = "alloc")]
78#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
79pub fn name() -> io::Result<CString> {
80    let mut buffer = [0_u8; 16];
81    unsafe { prctl_2args(PR_GET_NAME, buffer.as_mut_ptr().cast())? };
82
83    let len = buffer.iter().position(|&x| x == 0_u8).unwrap_or(0);
84    CString::new(&buffer[..len]).map_err(|_r| io::Errno::ILSEQ)
85}
86
87const PR_SET_NAME: c_int = 15;
88
89/// Set the name of the calling thread.
90///
91/// Unlike `pthread_setname_np`, this function silently truncates the name to
92/// 16 bytes, as the Linux syscall does.
93///
94/// # References
95///  - [`prctl(PR_SET_NAME,…)`]
96///
97/// [`prctl(PR_SET_NAME,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
98#[inline]
99pub fn set_name(name: &CStr) -> io::Result<()> {
100    unsafe { prctl_2args(PR_SET_NAME, name.as_ptr() as *mut _) }.map(|_r| ())
101}
102
103//
104// PR_GET_SECCOMP/PR_SET_SECCOMP
105//
106
107const PR_GET_SECCOMP: c_int = 21;
108
109const SECCOMP_MODE_DISABLED: i32 = 0;
110const SECCOMP_MODE_STRICT: i32 = 1;
111const SECCOMP_MODE_FILTER: i32 = 2;
112
113/// `SECCOMP_MODE_*`
114#[derive(Copy, Clone, Debug, Eq, PartialEq)]
115#[repr(i32)]
116pub enum SecureComputingMode {
117    /// Secure computing is not in use.
118    Disabled = SECCOMP_MODE_DISABLED,
119    /// Use hard-coded filter.
120    Strict = SECCOMP_MODE_STRICT,
121    /// Use user-supplied filter.
122    Filter = SECCOMP_MODE_FILTER,
123}
124
125impl TryFrom<i32> for SecureComputingMode {
126    type Error = io::Errno;
127
128    fn try_from(value: i32) -> Result<Self, Self::Error> {
129        match value {
130            SECCOMP_MODE_DISABLED => Ok(Self::Disabled),
131            SECCOMP_MODE_STRICT => Ok(Self::Strict),
132            SECCOMP_MODE_FILTER => Ok(Self::Filter),
133            _ => Err(io::Errno::RANGE),
134        }
135    }
136}
137
138/// Get the secure computing mode of the calling thread.
139///
140/// If the caller is not in secure computing mode, this returns
141/// [`SecureComputingMode::Disabled`]. If the caller is in strict secure
142/// computing mode, then this call will cause a [`Signal::KILL`] signal to be
143/// sent to the process. If the caller is in filter mode, and this system call
144/// is allowed by the seccomp filters, it returns
145/// [`SecureComputingMode::Filter`]; otherwise, the process is killed with a
146/// [`Signal::KILL`] signal.
147///
148/// Since Linux 3.8, the Seccomp field of the `/proc/[pid]/status` file
149/// provides a method of obtaining the same information, without the risk that
150/// the process is killed; see [the `proc` manual page].
151///
152/// # References
153///  - [`prctl(PR_GET_SECCOMP,…)`]
154///
155/// [`Signal::KILL`]: crate::signal::Signal::KILL
156/// [`prctl(PR_GET_SECCOMP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
157/// [the `proc` manual page]: https://man7.org/linux/man-pages/man5/proc.5.html
158#[inline]
159pub fn secure_computing_mode() -> io::Result<SecureComputingMode> {
160    unsafe { prctl_1arg(PR_GET_SECCOMP) }.and_then(TryInto::try_into)
161}
162
163const PR_SET_SECCOMP: c_int = 22;
164
165/// Set the secure computing mode for the calling thread, to limit the
166/// available system calls.
167///
168/// # References
169///  - [`prctl(PR_SET_SECCOMP,…)`]
170///
171/// [`prctl(PR_SET_SECCOMP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
172#[inline]
173pub fn set_secure_computing_mode(mode: SecureComputingMode) -> io::Result<()> {
174    unsafe { prctl_2args(PR_SET_SECCOMP, mode as usize as *mut _) }.map(|_r| ())
175}
176
177//
178// PR_CAPBSET_READ/PR_CAPBSET_DROP
179//
180
181const PR_CAPBSET_READ: c_int = 23;
182
183/// Linux per-thread capability.
184#[deprecated(since = "1.1.0", note = "Use CapabilitySet with a single bit instead")]
185#[derive(Copy, Clone, Debug, Eq, PartialEq)]
186#[repr(u32)]
187#[non_exhaustive]
188pub enum Capability {
189    /// In a system with the `_POSIX_CHOWN_RESTRICTED` option defined, this
190    /// overrides the restriction of changing file ownership and group
191    /// ownership.
192    ChangeOwnership = linux_raw_sys::general::CAP_CHOWN,
193    /// Override all DAC access, including ACL execute access if `_POSIX_ACL`
194    /// is defined. Excluding DAC access covered by
195    /// [`Capability::LinuxImmutable`].
196    DACOverride = linux_raw_sys::general::CAP_DAC_OVERRIDE,
197    /// Overrides all DAC restrictions regarding read and search on files and
198    /// directories, including ACL restrictions if `_POSIX_ACL` is defined.
199    /// Excluding DAC access covered by [`Capability::LinuxImmutable`].
200    DACReadSearch = linux_raw_sys::general::CAP_DAC_READ_SEARCH,
201    /// Overrides all restrictions about allowed operations on files, where
202    /// file owner ID must be equal to the user ID, except where
203    /// [`Capability::FileSetID`] is applicable. It doesn't override MAC and
204    /// DAC restrictions.
205    FileOwner = linux_raw_sys::general::CAP_FOWNER,
206    /// Overrides the following restrictions that the effective user ID shall
207    /// match the file owner ID when setting the `S_ISUID` and `S_ISGID` bits
208    /// on that file; that the effective group ID (or one of the supplementary
209    /// group IDs) shall match the file owner ID when setting the `S_ISGID` bit
210    /// on that file; that the `S_ISUID` and `S_ISGID` bits are cleared on
211    /// successful return from `chown` (not implemented).
212    FileSetID = linux_raw_sys::general::CAP_FSETID,
213    /// Overrides the restriction that the real or effective user ID of a
214    /// process sending a signal must match the real or effective user ID of
215    /// the process receiving the signal.
216    Kill = linux_raw_sys::general::CAP_KILL,
217    /// Allows `setgid` manipulation. Allows `setgroups`. Allows forged gids on
218    /// socket credentials passing.
219    SetGroupID = linux_raw_sys::general::CAP_SETGID,
220    /// Allows `set*uid` manipulation (including fsuid). Allows forged pids on
221    /// socket credentials passing.
222    SetUserID = linux_raw_sys::general::CAP_SETUID,
223    /// Without VFS support for capabilities:
224    ///  - Transfer any capability in your permitted set to any pid.
225    ///  - remove any capability in your permitted set from any pid. With VFS
226    ///    support for capabilities (neither of above, but)
227    ///  - Add any capability from current's capability bounding set to the
228    ///    current process' inheritable set.
229    ///  - Allow taking bits out of capability bounding set.
230    ///  - Allow modification of the securebits for a process.
231    SetPermittedCapabilities = linux_raw_sys::general::CAP_SETPCAP,
232    /// Allow modification of `S_IMMUTABLE` and `S_APPEND` file attributes.
233    LinuxImmutable = linux_raw_sys::general::CAP_LINUX_IMMUTABLE,
234    /// Allows binding to TCP/UDP sockets below 1024. Allows binding to ATM
235    /// VCIs below 32.
236    NetBindService = linux_raw_sys::general::CAP_NET_BIND_SERVICE,
237    /// Allow broadcasting, listen to multicast.
238    NetBroadcast = linux_raw_sys::general::CAP_NET_BROADCAST,
239    /// Allow interface configuration. Allow administration of IP firewall,
240    /// masquerading and accounting. Allow setting debug option on sockets.
241    /// Allow modification of routing tables. Allow setting arbitrary
242    /// process / process group ownership on sockets. Allow binding to any
243    /// address for transparent proxying (also via [`Capability::NetRaw`]).
244    /// Allow setting TOS (type of service). Allow setting promiscuous
245    /// mode. Allow clearing driver statistics. Allow multicasting. Allow
246    /// read/write of device-specific registers. Allow activation of ATM
247    /// control sockets.
248    NetAdmin = linux_raw_sys::general::CAP_NET_ADMIN,
249    /// Allow use of `RAW` sockets. Allow use of `PACKET` sockets. Allow
250    /// binding to any address for transparent proxying (also via
251    /// [`Capability::NetAdmin`]).
252    NetRaw = linux_raw_sys::general::CAP_NET_RAW,
253    /// Allow locking of shared memory segments. Allow mlock and mlockall
254    /// (which doesn't really have anything to do with IPC).
255    IPCLock = linux_raw_sys::general::CAP_IPC_LOCK,
256    /// Override IPC ownership checks.
257    IPCOwner = linux_raw_sys::general::CAP_IPC_OWNER,
258    /// Insert and remove kernel modules - modify kernel without limit.
259    SystemModule = linux_raw_sys::general::CAP_SYS_MODULE,
260    /// Allow ioperm/iopl access. Allow sending USB messages to any device via
261    /// `/dev/bus/usb`.
262    SystemRawIO = linux_raw_sys::general::CAP_SYS_RAWIO,
263    /// Allow use of `chroot`.
264    SystemChangeRoot = linux_raw_sys::general::CAP_SYS_CHROOT,
265    /// Allow `ptrace` of any process.
266    SystemProcessTrace = linux_raw_sys::general::CAP_SYS_PTRACE,
267    /// Allow configuration of process accounting.
268    SystemProcessAccounting = linux_raw_sys::general::CAP_SYS_PACCT,
269    /// Allow configuration of the secure attention key. Allow administration
270    /// of the random device. Allow examination and configuration of disk
271    /// quotas. Allow setting the domainname. Allow setting the hostname.
272    /// Allow `mount` and `umount`, setting up new smb connection.
273    /// Allow some autofs root ioctls. Allow nfsservctl. Allow
274    /// `VM86_REQUEST_IRQ`. Allow to read/write pci config on alpha. Allow
275    /// `irix_prctl` on mips (setstacksize). Allow flushing all cache on
276    /// m68k (`sys_cacheflush`). Allow removing semaphores. Used instead of
277    /// [`Capability::ChangeOwnership`] to "chown" IPC message queues,
278    /// semaphores and shared memory. Allow locking/unlocking of shared
279    /// memory segment. Allow turning swap on/off. Allow forged pids on
280    /// socket credentials passing. Allow setting readahead and
281    /// flushing buffers on block devices. Allow setting geometry in floppy
282    /// driver. Allow turning DMA on/off in `xd` driver. Allow
283    /// administration of md devices (mostly the above, but some
284    /// extra ioctls). Allow tuning the ide driver. Allow access to the nvram
285    /// device. Allow administration of `apm_bios`, serial and bttv (TV)
286    /// device. Allow manufacturer commands in isdn CAPI support driver.
287    /// Allow reading non-standardized portions of pci configuration space.
288    /// Allow DDI debug ioctl on sbpcd driver. Allow setting up serial ports.
289    /// Allow sending raw qic-117 commands. Allow enabling/disabling tagged
290    /// queuing on SCSI controllers and sending arbitrary SCSI commands.
291    /// Allow setting encryption key on loopback filesystem. Allow setting
292    /// zone reclaim policy. Allow everything under
293    /// [`Capability::BerkeleyPacketFilters`] and
294    /// [`Capability::PerformanceMonitoring`] for backward compatibility.
295    SystemAdmin = linux_raw_sys::general::CAP_SYS_ADMIN,
296    /// Allow use of `reboot`.
297    SystemBoot = linux_raw_sys::general::CAP_SYS_BOOT,
298    /// Allow raising priority and setting priority on other (different UID)
299    /// processes. Allow use of FIFO and round-robin (realtime) scheduling
300    /// on own processes and setting the scheduling algorithm used by
301    /// another process. Allow setting cpu affinity on other processes.
302    /// Allow setting realtime ioprio class. Allow setting ioprio class on
303    /// other processes.
304    SystemNice = linux_raw_sys::general::CAP_SYS_NICE,
305    /// Override resource limits. Set resource limits. Override quota limits.
306    /// Override reserved space on ext2 filesystem. Modify data journaling
307    /// mode on ext3 filesystem (uses journaling resources). NOTE: ext2
308    /// honors fsuid when checking for resource overrides, so you can
309    /// override using fsuid too. Override size restrictions on IPC message
310    /// queues. Allow more than 64hz interrupts from the real-time clock.
311    /// Override max number of consoles on console allocation. Override max
312    /// number of keymaps. Control memory reclaim behavior.
313    SystemResource = linux_raw_sys::general::CAP_SYS_RESOURCE,
314    /// Allow manipulation of system clock. Allow `irix_stime` on mips. Allow
315    /// setting the real-time clock.
316    SystemTime = linux_raw_sys::general::CAP_SYS_TIME,
317    /// Allow configuration of tty devices. Allow `vhangup` of tty.
318    SystemTTYConfig = linux_raw_sys::general::CAP_SYS_TTY_CONFIG,
319    /// Allow the privileged aspects of `mknod`.
320    MakeNode = linux_raw_sys::general::CAP_MKNOD,
321    /// Allow taking of leases on files.
322    Lease = linux_raw_sys::general::CAP_LEASE,
323    /// Allow writing the audit log via unicast netlink socket.
324    AuditWrite = linux_raw_sys::general::CAP_AUDIT_WRITE,
325    /// Allow configuration of audit via unicast netlink socket.
326    AuditControl = linux_raw_sys::general::CAP_AUDIT_CONTROL,
327    /// Set or remove capabilities on files. Map `uid=0` into a child user
328    /// namespace.
329    SetFileCapabilities = linux_raw_sys::general::CAP_SETFCAP,
330    /// Override MAC access. The base kernel enforces no MAC policy. An LSM may
331    /// enforce a MAC policy, and if it does and it chooses to implement
332    /// capability based overrides of that policy, this is the capability it
333    /// should use to do so.
334    MACOverride = linux_raw_sys::general::CAP_MAC_OVERRIDE,
335    /// Allow MAC configuration or state changes. The base kernel requires no
336    /// MAC configuration. An LSM may enforce a MAC policy, and if it does and
337    /// it chooses to implement capability based checks on modifications to
338    /// that policy or the data required to maintain it, this is the capability
339    /// it should use to do so.
340    MACAdmin = linux_raw_sys::general::CAP_MAC_ADMIN,
341    /// Allow configuring the kernel's `syslog` (`printk` behaviour).
342    SystemLog = linux_raw_sys::general::CAP_SYSLOG,
343    /// Allow triggering something that will wake the system.
344    WakeAlarm = linux_raw_sys::general::CAP_WAKE_ALARM,
345    /// Allow preventing system suspends.
346    BlockSuspend = linux_raw_sys::general::CAP_BLOCK_SUSPEND,
347    /// Allow reading the audit log via multicast netlink socket.
348    AuditRead = linux_raw_sys::general::CAP_AUDIT_READ,
349    /// Allow system performance and observability privileged operations using
350    /// `perf_events`, `i915_perf` and other kernel subsystems.
351    PerformanceMonitoring = linux_raw_sys::general::CAP_PERFMON,
352    /// This capability allows the following BPF operations:
353    ///  - Creating all types of BPF maps
354    ///  - Advanced verifier features
355    ///     - Indirect variable access
356    ///     - Bounded loops
357    ///     - BPF to BPF function calls
358    ///     - Scalar precision tracking
359    ///     - Larger complexity limits
360    ///     - Dead code elimination
361    ///     - And potentially other features
362    ///  - Loading BPF Type Format (BTF) data
363    ///  - Retrieve `xlated` and JITed code of BPF programs
364    ///  - Use `bpf_spin_lock` helper
365    ///
366    /// [`Capability::PerformanceMonitoring`] relaxes the verifier checks
367    /// further:
368    ///  - BPF progs can use of pointer-to-integer conversions
369    ///  - speculation attack hardening measures are bypassed
370    ///  - `bpf_probe_read` to read arbitrary kernel memory is allowed
371    ///  - `bpf_trace_printk` to print kernel memory is allowed
372    ///
373    /// [`Capability::SystemAdmin`] is required to use `bpf_probe_write_user`.
374    ///
375    /// [`Capability::SystemAdmin`] is required to iterate system-wide loaded
376    /// programs, maps, links, and BTFs, and convert their IDs to file
377    /// descriptors.
378    ///
379    /// [`Capability::PerformanceMonitoring`] and
380    /// [`Capability::BerkeleyPacketFilters`] are required to load tracing
381    /// programs. [`Capability::NetAdmin`] and
382    /// [`Capability::BerkeleyPacketFilters`] are required to load
383    /// networking programs.
384    BerkeleyPacketFilters = linux_raw_sys::general::CAP_BPF,
385    /// Allow checkpoint/restore related operations. Allow PID selection during
386    /// `clone3`. Allow writing to `ns_last_pid`.
387    CheckpointRestore = linux_raw_sys::general::CAP_CHECKPOINT_RESTORE,
388}
389
390mod private {
391    pub trait Sealed {}
392    pub struct Token;
393
394    #[allow(deprecated)]
395    impl Sealed for crate::thread::Capability {}
396    impl Sealed for crate::thread::CapabilitySet {}
397}
398/// Compatibility trait to keep existing code that uses the deprecated [`Capability`] type working.
399///
400/// This trait and its methods are sealed. It must not be used downstream.
401pub trait CompatCapability: private::Sealed + Copy {
402    #[doc(hidden)]
403    fn as_capability_set(self, _: private::Token) -> CapabilitySet;
404}
405#[allow(deprecated)]
406impl CompatCapability for Capability {
407    fn as_capability_set(self, _: private::Token) -> CapabilitySet {
408        match self {
409            Self::ChangeOwnership => CapabilitySet::CHOWN,
410            Self::DACOverride => CapabilitySet::DAC_OVERRIDE,
411            Self::DACReadSearch => CapabilitySet::DAC_READ_SEARCH,
412            Self::FileOwner => CapabilitySet::FOWNER,
413            Self::FileSetID => CapabilitySet::FSETID,
414            Self::Kill => CapabilitySet::KILL,
415            Self::SetGroupID => CapabilitySet::SETGID,
416            Self::SetUserID => CapabilitySet::SETUID,
417            Self::SetPermittedCapabilities => CapabilitySet::SETPCAP,
418            Self::LinuxImmutable => CapabilitySet::LINUX_IMMUTABLE,
419            Self::NetBindService => CapabilitySet::NET_BIND_SERVICE,
420            Self::NetBroadcast => CapabilitySet::NET_BROADCAST,
421            Self::NetAdmin => CapabilitySet::NET_ADMIN,
422            Self::NetRaw => CapabilitySet::NET_RAW,
423            Self::IPCLock => CapabilitySet::IPC_LOCK,
424            Self::IPCOwner => CapabilitySet::IPC_OWNER,
425            Self::SystemModule => CapabilitySet::SYS_MODULE,
426            Self::SystemRawIO => CapabilitySet::SYS_RAWIO,
427            Self::SystemChangeRoot => CapabilitySet::SYS_CHROOT,
428            Self::SystemProcessTrace => CapabilitySet::SYS_PTRACE,
429            Self::SystemProcessAccounting => CapabilitySet::SYS_PACCT,
430            Self::SystemAdmin => CapabilitySet::SYS_ADMIN,
431            Self::SystemBoot => CapabilitySet::SYS_BOOT,
432            Self::SystemNice => CapabilitySet::SYS_NICE,
433            Self::SystemResource => CapabilitySet::SYS_RESOURCE,
434            Self::SystemTime => CapabilitySet::SYS_TIME,
435            Self::SystemTTYConfig => CapabilitySet::SYS_TTY_CONFIG,
436            Self::MakeNode => CapabilitySet::MKNOD,
437            Self::Lease => CapabilitySet::LEASE,
438            Self::AuditWrite => CapabilitySet::AUDIT_WRITE,
439            Self::AuditControl => CapabilitySet::AUDIT_CONTROL,
440            Self::SetFileCapabilities => CapabilitySet::SETFCAP,
441            Self::MACOverride => CapabilitySet::MAC_OVERRIDE,
442            Self::MACAdmin => CapabilitySet::MAC_ADMIN,
443            Self::SystemLog => CapabilitySet::SYSLOG,
444            Self::WakeAlarm => CapabilitySet::WAKE_ALARM,
445            Self::BlockSuspend => CapabilitySet::BLOCK_SUSPEND,
446            Self::AuditRead => CapabilitySet::AUDIT_READ,
447            Self::PerformanceMonitoring => CapabilitySet::PERFMON,
448            Self::BerkeleyPacketFilters => CapabilitySet::BPF,
449            Self::CheckpointRestore => CapabilitySet::CHECKPOINT_RESTORE,
450        }
451    }
452}
453impl CompatCapability for CapabilitySet {
454    fn as_capability_set(self, _: private::Token) -> CapabilitySet {
455        self
456    }
457}
458
459/// Check if the specified capability is in the calling thread's capability
460/// bounding set.
461///
462/// # References
463///  - [`prctl(PR_CAPBSET_READ,…)`]
464///
465/// [`prctl(PR_CAPBSET_READ,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
466#[inline]
467pub fn capability_is_in_bounding_set(capability: impl CompatCapability) -> io::Result<bool> {
468    let capset = capability.as_capability_set(private::Token).bits();
469    if capset.count_ones() != 1 {
470        return Err(Errno::INVAL);
471    }
472    let cap = capset.trailing_zeros();
473
474    // as *mut _ should be ptr::without_provenance_mut but our MSRV does not allow it.
475    unsafe { prctl_2args(PR_CAPBSET_READ, cap as usize as *mut _) }.map(|r| r != 0)
476}
477
478const PR_CAPBSET_DROP: c_int = 24;
479
480/// If the calling thread has the [`Capability::SetPermittedCapabilities`]
481/// capability within its user namespace, then drop the specified capability
482/// from the thread's capability bounding set.
483///
484/// # References
485///  - [`prctl(PR_CAPBSET_DROP,…)`]
486///
487/// [`prctl(PR_CAPBSET_DROP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
488#[inline]
489pub fn remove_capability_from_bounding_set(capability: impl CompatCapability) -> io::Result<()> {
490    let capset = capability.as_capability_set(private::Token).bits();
491    if capset.count_ones() != 1 {
492        return Err(Errno::INVAL);
493    }
494    let cap = capset.trailing_zeros();
495
496    // as *mut _ should be ptr::without_provenance_mut but our MSRV does not allow it.
497    unsafe { prctl_2args(PR_CAPBSET_DROP, cap as usize as *mut _) }.map(|_r| ())
498}
499
500//
501// PR_GET_SECUREBITS/PR_SET_SECUREBITS
502//
503
504const PR_GET_SECUREBITS: c_int = 27;
505
506bitflags! {
507    /// `SECBIT_*`
508    #[repr(transparent)]
509    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
510    pub struct CapabilitiesSecureBits: u32 {
511        /// If this bit is set, then the kernel does not grant capabilities
512        /// when a `set-user-ID-root` program is executed, or when a process
513        /// with an effective or real UID of [`Uid::ROOT`] calls `execve`.
514        const NO_ROOT = 1_u32 << 0;
515        /// Set [`NO_ROOT`] irreversibly.
516        ///
517        /// [`NO_ROOT`]: Self::NO_ROOT
518        const NO_ROOT_LOCKED = 1_u32 << 1;
519        /// Setting this flag stops the kernel from adjusting the process'
520        /// permitted, effective, and ambient capability sets when the thread's
521        /// effective and filesystem UIDs are switched between zero and nonzero
522        /// values.
523        const NO_SETUID_FIXUP = 1_u32 << 2;
524        /// Set [`NO_SETUID_FIXUP`] irreversibly.
525        ///
526        /// [`NO_SETUID_FIXUP`]: Self::NO_SETUID_FIXUP
527        const NO_SETUID_FIXUP_LOCKED = 1_u32 << 3;
528        /// Setting this flag allows a thread that has one or more 0 UIDs to
529        /// retain capabilities in its permitted set when it switches all of
530        /// its UIDs to nonzero values.
531        const KEEP_CAPS = 1_u32 << 4;
532        /// Set [`KEEP_CAPS`] irreversibly.
533        ///
534        /// [`KEEP_CAPS`]: Self::KEEP_CAPS
535        const KEEP_CAPS_LOCKED = 1_u32 << 5;
536        /// Setting this flag disallows raising ambient capabilities via the
537        /// `prctl`'s `PR_CAP_AMBIENT_RAISE` operation.
538        const NO_CAP_AMBIENT_RAISE = 1_u32 << 6;
539        /// Set [`NO_CAP_AMBIENT_RAISE`] irreversibly.
540        ///
541        /// [`NO_CAP_AMBIENT_RAISE`]: Self::NO_CAP_AMBIENT_RAISE
542        const NO_CAP_AMBIENT_RAISE_LOCKED = 1_u32 << 7;
543
544        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags>
545        const _ = !0;
546    }
547}
548
549/// Get the `securebits` flags of the calling thread.
550///
551/// # References
552///  - [`prctl(PR_GET_SECUREBITS,…)`]
553///
554/// [`prctl(PR_GET_SECUREBITS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
555#[inline]
556pub fn capabilities_secure_bits() -> io::Result<CapabilitiesSecureBits> {
557    let r = unsafe { prctl_1arg(PR_GET_SECUREBITS)? } as c_uint;
558    CapabilitiesSecureBits::from_bits(r).ok_or(io::Errno::RANGE)
559}
560
561const PR_SET_SECUREBITS: c_int = 28;
562
563/// Set the `securebits` flags of the calling thread.
564///
565/// # References
566///  - [`prctl(PR_SET_SECUREBITS,…)`]
567///
568/// [`prctl(PR_SET_SECUREBITS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
569#[inline]
570pub fn set_capabilities_secure_bits(bits: CapabilitiesSecureBits) -> io::Result<()> {
571    unsafe { prctl_2args(PR_SET_SECUREBITS, bits.bits() as usize as *mut _) }.map(|_r| ())
572}
573
574//
575// PR_GET_TIMERSLACK/PR_SET_TIMERSLACK
576//
577
578const PR_GET_TIMERSLACK: c_int = 30;
579
580/// Get the `current` timer slack value of the calling thread.
581///
582/// # References
583///  - [`prctl(PR_GET_TIMERSLACK,…)`]
584///
585/// [`prctl(PR_GET_TIMERSLACK,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
586#[inline]
587pub fn current_timer_slack() -> io::Result<u64> {
588    unsafe { prctl_1arg(PR_GET_TIMERSLACK) }.map(|r| r as u64)
589}
590
591const PR_SET_TIMERSLACK: c_int = 29;
592
593/// Sets the `current` timer slack value for the calling thread.
594///
595/// # References
596///  - [`prctl(PR_SET_TIMERSLACK,…)`]
597///
598/// [`prctl(PR_SET_TIMERSLACK,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
599#[inline]
600pub fn set_current_timer_slack(value: Option<NonZeroU64>) -> io::Result<()> {
601    let value = usize::try_from(value.map_or(0, NonZeroU64::get)).map_err(|_r| io::Errno::RANGE)?;
602    unsafe { prctl_2args(PR_SET_TIMERSLACK, value as *mut _) }.map(|_r| ())
603}
604
605//
606// PR_GET_NO_NEW_PRIVS/PR_SET_NO_NEW_PRIVS
607//
608
609const PR_GET_NO_NEW_PRIVS: c_int = 39;
610
611/// Get the value of the `no_new_privs` attribute for the calling thread.
612///
613/// # References
614///  - [`prctl(PR_GET_NO_NEW_PRIVS,…)`]
615///
616/// [`prctl(PR_GET_NO_NEW_PRIVS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
617#[inline]
618pub fn no_new_privs() -> io::Result<bool> {
619    unsafe { prctl_1arg(PR_GET_NO_NEW_PRIVS) }.map(|r| r != 0)
620}
621
622const PR_SET_NO_NEW_PRIVS: c_int = 38;
623
624/// Set the calling thread's `no_new_privs` attribute.
625///
626/// # References
627///  - [`prctl(PR_SET_NO_NEW_PRIVS,…)`]
628///
629/// [`prctl(PR_SET_NO_NEW_PRIVS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
630#[inline]
631pub fn set_no_new_privs(no_new_privs: bool) -> io::Result<()> {
632    unsafe { prctl_2args(PR_SET_NO_NEW_PRIVS, usize::from(no_new_privs) as *mut _) }.map(|_r| ())
633}
634
635//
636// PR_GET_TID_ADDRESS
637//
638
639const PR_GET_TID_ADDRESS: c_int = 40;
640
641/// Get the `clear_child_tid` address set by `set_tid_address`
642/// and `clone`'s `CLONE_CHILD_CLEARTID` flag.
643///
644/// # References
645///  - [`prctl(PR_GET_TID_ADDRESS,…)`]
646///
647/// [`prctl(PR_GET_TID_ADDRESS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
648#[inline]
649pub fn get_clear_child_tid_address() -> io::Result<Option<NonNull<c_void>>> {
650    unsafe { prctl_get_at_arg2_optional::<*mut c_void>(PR_GET_TID_ADDRESS) }.map(NonNull::new)
651}
652
653//
654// PR_GET_THP_DISABLE/PR_SET_THP_DISABLE
655//
656
657const PR_GET_THP_DISABLE: c_int = 42;
658
659/// Get the current setting of the `THP disable` flag for the calling thread.
660///
661/// # References
662///  - [`prctl(PR_GET_THP_DISABLE,…)`]
663///
664/// [`prctl(PR_GET_THP_DISABLE,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
665#[inline]
666pub fn transparent_huge_pages_are_disabled() -> io::Result<bool> {
667    unsafe { prctl_1arg(PR_GET_THP_DISABLE) }.map(|r| r != 0)
668}
669
670const PR_SET_THP_DISABLE: c_int = 41;
671
672/// Set the state of the `THP disable` flag for the calling thread.
673///
674/// # References
675///  - [`prctl(PR_SET_THP_DISABLE,…)`]
676///
677/// [`prctl(PR_SET_THP_DISABLE,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
678#[inline]
679pub fn disable_transparent_huge_pages(thp_disable: bool) -> io::Result<()> {
680    unsafe { prctl_2args(PR_SET_THP_DISABLE, usize::from(thp_disable) as *mut _) }.map(|_r| ())
681}
682
683//
684// PR_CAP_AMBIENT
685//
686
687const PR_CAP_AMBIENT: c_int = 47;
688
689const PR_CAP_AMBIENT_IS_SET: usize = 1;
690
691/// Check if the specified capability is in the ambient set.
692///
693/// # References
694///  - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,…)`]
695///
696/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
697#[inline]
698pub fn capability_is_in_ambient_set(capability: impl CompatCapability) -> io::Result<bool> {
699    let capset = capability.as_capability_set(private::Token).bits();
700    if capset.count_ones() != 1 {
701        return Err(Errno::INVAL);
702    }
703    let cap = capset.trailing_zeros();
704
705    unsafe {
706        prctl_3args(
707            PR_CAP_AMBIENT,
708            PR_CAP_AMBIENT_IS_SET as *mut _,
709            // as *mut _ should be ptr::without_provenance_mut but our MSRV does not allow it.
710            cap as usize as *mut _,
711        )
712    }
713    .map(|r| r != 0)
714}
715
716const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4;
717
718/// Remove all capabilities from the ambient set.
719///
720/// # References
721///  - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,…)`]
722///
723/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
724#[inline]
725pub fn clear_ambient_capability_set() -> io::Result<()> {
726    unsafe { prctl_2args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL as *mut _) }.map(|_r| ())
727}
728
729const PR_CAP_AMBIENT_RAISE: usize = 2;
730const PR_CAP_AMBIENT_LOWER: usize = 3;
731
732/// Add or remove the specified capability to the ambient set.
733///
734/// # References
735///  - [`prctl(PR_CAP_AMBIENT,…)`]
736///
737/// [`prctl(PR_CAP_AMBIENT,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
738#[inline]
739pub fn configure_capability_in_ambient_set(
740    capability: impl CompatCapability,
741    enable: bool,
742) -> io::Result<()> {
743    let sub_operation = if enable {
744        PR_CAP_AMBIENT_RAISE
745    } else {
746        PR_CAP_AMBIENT_LOWER
747    };
748    let capset = capability.as_capability_set(private::Token).bits();
749    if capset.count_ones() != 1 {
750        return Err(Errno::INVAL);
751    }
752    let cap = capset.trailing_zeros();
753
754    unsafe {
755        prctl_3args(
756            PR_CAP_AMBIENT,
757            sub_operation as *mut _,
758            // as *mut _ should be ptr::without_provenance_mut but our MSRV does not allow it.
759            cap as usize as *mut _,
760        )
761    }
762    .map(|_r| ())
763}
764
765//
766// PR_SVE_GET_VL/PR_SVE_SET_VL
767//
768
769const PR_SVE_GET_VL: c_int = 51;
770
771const PR_SVE_VL_LEN_MASK: u32 = 0xffff;
772const PR_SVE_VL_INHERIT: u32 = 1_u32 << 17;
773
774/// Scalable Vector Extension vector length configuration.
775#[derive(Copy, Clone, Debug, Eq, PartialEq)]
776pub struct SVEVectorLengthConfig {
777    /// Vector length in bytes.
778    pub vector_length_in_bytes: u32,
779    /// Vector length inherited across `execve`.
780    pub vector_length_inherited_across_execve: bool,
781}
782
783/// Get the thread's current SVE vector length configuration.
784///
785/// # References
786///  - [`prctl(PR_SVE_GET_VL,…)`]
787///
788/// [`prctl(PR_SVE_GET_VL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
789#[inline]
790pub fn sve_vector_length_configuration() -> io::Result<SVEVectorLengthConfig> {
791    let bits = unsafe { prctl_1arg(PR_SVE_GET_VL)? } as c_uint;
792    Ok(SVEVectorLengthConfig {
793        vector_length_in_bytes: bits & PR_SVE_VL_LEN_MASK,
794        vector_length_inherited_across_execve: (bits & PR_SVE_VL_INHERIT) != 0,
795    })
796}
797
798const PR_SVE_SET_VL: c_int = 50;
799
800const PR_SVE_SET_VL_ONEXEC: u32 = 1_u32 << 18;
801
802/// Configure the thread's vector length of Scalable Vector Extension.
803///
804/// # References
805///  - [`prctl(PR_SVE_SET_VL,…)`]
806///
807/// # Safety
808///
809/// Please ensure the conditions necessary to safely call this function,
810/// as detailed in the references above.
811///
812/// [`prctl(PR_SVE_SET_VL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
813#[inline]
814pub unsafe fn set_sve_vector_length_configuration(
815    vector_length_in_bytes: usize,
816    vector_length_inherited_across_execve: bool,
817    defer_change_to_next_execve: bool,
818) -> io::Result<()> {
819    let vector_length_in_bytes =
820        u32::try_from(vector_length_in_bytes).map_err(|_r| io::Errno::RANGE)?;
821
822    let mut bits = vector_length_in_bytes & PR_SVE_VL_LEN_MASK;
823
824    if vector_length_inherited_across_execve {
825        bits |= PR_SVE_VL_INHERIT;
826    }
827
828    if defer_change_to_next_execve {
829        bits |= PR_SVE_SET_VL_ONEXEC;
830    }
831
832    prctl_2args(PR_SVE_SET_VL, bits as usize as *mut _).map(|_r| ())
833}
834
835//
836// PR_PAC_RESET_KEYS
837//
838
839const PR_PAC_RESET_KEYS: c_int = 54;
840
841/// Securely reset the thread's pointer authentication keys to fresh random
842/// values generated by the kernel.
843///
844/// # References
845///  - [`prctl(PR_PAC_RESET_KEYS,…)`]
846///
847/// # Safety
848///
849/// Please ensure the conditions necessary to safely call this function,
850/// as detailed in the references above.
851///
852/// [`prctl(PR_PAC_RESET_KEYS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
853#[inline]
854#[cfg(linux_raw_dep)]
855pub unsafe fn reset_pointer_authentication_keys(
856    keys: Option<PointerAuthenticationKeys>,
857) -> io::Result<()> {
858    let keys = keys.as_ref().map_or(0_u32, PointerAuthenticationKeys::bits);
859    prctl_2args(PR_PAC_RESET_KEYS, keys as usize as *mut _).map(|_r| ())
860}
861
862//
863// PR_GET_TAGGED_ADDR_CTRL/PR_SET_TAGGED_ADDR_CTRL
864//
865
866const PR_GET_TAGGED_ADDR_CTRL: c_int = 56;
867
868const PR_MTE_TAG_SHIFT: u32 = 3;
869const PR_MTE_TAG_MASK: u32 = 0xffff_u32 << PR_MTE_TAG_SHIFT;
870
871bitflags! {
872    /// Zero means addresses that are passed for the purpose of being
873    /// dereferenced by the kernel must be untagged.
874    #[repr(transparent)]
875    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
876    pub struct TaggedAddressMode: u32 {
877        /// Addresses that are passed for the purpose of being dereferenced by
878        /// the kernel may be tagged.
879        const ENABLED = 1_u32 << 0;
880        /// Synchronous tag check fault mode.
881        const TCF_SYNC = 1_u32 << 1;
882        /// Asynchronous tag check fault mode.
883        const TCF_ASYNC = 1_u32 << 2;
884
885        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags>
886        const _ = !0;
887    }
888}
889
890/// Get the current tagged address mode for the calling thread.
891///
892/// # References
893///  - [`prctl(PR_GET_TAGGED_ADDR_CTRL,…)`]
894///
895/// [`prctl(PR_GET_TAGGED_ADDR_CTRL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
896#[inline]
897pub fn current_tagged_address_mode() -> io::Result<(Option<TaggedAddressMode>, u32)> {
898    let r = unsafe { prctl_1arg(PR_GET_TAGGED_ADDR_CTRL)? } as c_uint;
899    let mode = r & 0b111_u32;
900    let mte_tag = (r & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT;
901    Ok((TaggedAddressMode::from_bits(mode), mte_tag))
902}
903
904const PR_SET_TAGGED_ADDR_CTRL: c_int = 55;
905
906/// Controls support for passing tagged user-space addresses to the kernel.
907///
908/// # References
909///  - [`prctl(PR_SET_TAGGED_ADDR_CTRL,…)`]
910///
911/// # Safety
912///
913/// Please ensure the conditions necessary to safely call this function, as
914/// detailed in the references above.
915///
916/// [`prctl(PR_SET_TAGGED_ADDR_CTRL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
917#[inline]
918pub unsafe fn set_current_tagged_address_mode(
919    mode: Option<TaggedAddressMode>,
920    mte_tag: u32,
921) -> io::Result<()> {
922    let config = mode.as_ref().map_or(0_u32, TaggedAddressMode::bits)
923        | ((mte_tag << PR_MTE_TAG_SHIFT) & PR_MTE_TAG_MASK);
924    prctl_2args(PR_SET_TAGGED_ADDR_CTRL, config as usize as *mut _).map(|_r| ())
925}
926
927//
928// PR_SET_SYSCALL_USER_DISPATCH
929//
930
931const PR_SET_SYSCALL_USER_DISPATCH: c_int = 59;
932
933const PR_SYS_DISPATCH_OFF: usize = 0;
934
935/// Disable Syscall User Dispatch mechanism.
936///
937/// # References
938///  - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,…)`]
939///
940/// # Safety
941///
942/// Please ensure the conditions necessary to safely call this function, as
943/// detailed in the references above.
944///
945/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
946#[inline]
947pub unsafe fn disable_syscall_user_dispatch() -> io::Result<()> {
948    prctl_2args(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF as *mut _).map(|_r| ())
949}
950
951const PR_SYS_DISPATCH_ON: usize = 1;
952
953/// Allow system calls to be executed.
954const SYSCALL_DISPATCH_FILTER_ALLOW: u8 = 0;
955/// Block system calls from executing.
956const SYSCALL_DISPATCH_FILTER_BLOCK: u8 = 1;
957
958/// Value of the fast switch flag controlling system calls user dispatch
959/// mechanism without the need to issue a syscall.
960#[derive(Copy, Clone, Debug, Eq, PartialEq)]
961#[repr(u8)]
962pub enum SysCallUserDispatchFastSwitch {
963    /// System calls are allowed to execute.
964    Allow = SYSCALL_DISPATCH_FILTER_ALLOW,
965    /// System calls are blocked from executing.
966    Block = SYSCALL_DISPATCH_FILTER_BLOCK,
967}
968
969impl TryFrom<u8> for SysCallUserDispatchFastSwitch {
970    type Error = io::Errno;
971
972    fn try_from(value: u8) -> Result<Self, Self::Error> {
973        match value {
974            SYSCALL_DISPATCH_FILTER_ALLOW => Ok(Self::Allow),
975            SYSCALL_DISPATCH_FILTER_BLOCK => Ok(Self::Block),
976            _ => Err(io::Errno::RANGE),
977        }
978    }
979}
980
981/// Enable Syscall User Dispatch mechanism.
982///
983/// # References
984///  - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,…)`]
985///
986/// # Safety
987///
988/// Please ensure the conditions necessary to safely call this function, as
989/// detailed in the references above.
990///
991/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
992#[inline]
993pub unsafe fn enable_syscall_user_dispatch(
994    always_allowed_region: &[u8],
995    fast_switch_flag: &AtomicU8,
996) -> io::Result<()> {
997    syscalls::prctl(
998        PR_SET_SYSCALL_USER_DISPATCH,
999        PR_SYS_DISPATCH_ON as *mut _,
1000        always_allowed_region.as_ptr() as *mut _,
1001        always_allowed_region.len() as *mut _,
1002        as_ptr(fast_switch_flag) as *mut _,
1003    )
1004    .map(|_r| ())
1005}
1006
1007//
1008// PR_SCHED_CORE
1009//
1010
1011const PR_SCHED_CORE: c_int = 62;
1012
1013const PR_SCHED_CORE_GET: usize = 0;
1014
1015const PR_SCHED_CORE_SCOPE_THREAD: u32 = 0;
1016const PR_SCHED_CORE_SCOPE_THREAD_GROUP: u32 = 1;
1017const PR_SCHED_CORE_SCOPE_PROCESS_GROUP: u32 = 2;
1018
1019/// `PR_SCHED_CORE_SCOPE_*`
1020#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1021#[repr(u32)]
1022pub enum CoreSchedulingScope {
1023    /// Operation will be performed for the thread.
1024    Thread = PR_SCHED_CORE_SCOPE_THREAD,
1025    /// Operation will be performed for all tasks in the task group of the
1026    /// process.
1027    ThreadGroup = PR_SCHED_CORE_SCOPE_THREAD_GROUP,
1028    /// Operation will be performed for all processes in the process group.
1029    ProcessGroup = PR_SCHED_CORE_SCOPE_PROCESS_GROUP,
1030}
1031
1032impl TryFrom<u32> for CoreSchedulingScope {
1033    type Error = io::Errno;
1034
1035    fn try_from(value: u32) -> Result<Self, Self::Error> {
1036        match value {
1037            PR_SCHED_CORE_SCOPE_THREAD => Ok(Self::Thread),
1038            PR_SCHED_CORE_SCOPE_THREAD_GROUP => Ok(Self::ThreadGroup),
1039            PR_SCHED_CORE_SCOPE_PROCESS_GROUP => Ok(Self::ProcessGroup),
1040            _ => Err(io::Errno::RANGE),
1041        }
1042    }
1043}
1044
1045/// Get core scheduling cookie of a process.
1046///
1047/// # References
1048///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,…)`]
1049///
1050/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,…)`]: https://www.kernel.org/doc/html/v6.13/admin-guide/hw-vuln/core-scheduling.html
1051#[inline]
1052pub fn core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<u64> {
1053    let mut value: MaybeUninit<u64> = MaybeUninit::uninit();
1054    unsafe {
1055        syscalls::prctl(
1056            PR_SCHED_CORE,
1057            PR_SCHED_CORE_GET as *mut _,
1058            pid.as_raw_nonzero().get() as usize as *mut _,
1059            scope as usize as *mut _,
1060            value.as_mut_ptr().cast(),
1061        )?;
1062        Ok(value.assume_init())
1063    }
1064}
1065
1066const PR_SCHED_CORE_CREATE: usize = 1;
1067
1068/// Create unique core scheduling cookie.
1069///
1070/// # References
1071///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,…)`]
1072///
1073/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,…)`]: https://www.kernel.org/doc/html/v6.13/admin-guide/hw-vuln/core-scheduling.html
1074#[inline]
1075pub fn create_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
1076    unsafe {
1077        syscalls::prctl(
1078            PR_SCHED_CORE,
1079            PR_SCHED_CORE_CREATE as *mut _,
1080            pid.as_raw_nonzero().get() as usize as *mut _,
1081            scope as usize as *mut _,
1082            ptr::null_mut(),
1083        )
1084        .map(|_r| ())
1085    }
1086}
1087
1088const PR_SCHED_CORE_SHARE_TO: usize = 2;
1089
1090/// Push core scheduling cookie to a process.
1091///
1092/// # References
1093///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,…)`]
1094///
1095/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,…)`]: https://www.kernel.org/doc/html/v6.13/admin-guide/hw-vuln/core-scheduling.html
1096#[inline]
1097pub fn push_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
1098    unsafe {
1099        syscalls::prctl(
1100            PR_SCHED_CORE,
1101            PR_SCHED_CORE_SHARE_TO as *mut _,
1102            pid.as_raw_nonzero().get() as usize as *mut _,
1103            scope as usize as *mut _,
1104            ptr::null_mut(),
1105        )
1106        .map(|_r| ())
1107    }
1108}
1109
1110const PR_SCHED_CORE_SHARE_FROM: usize = 3;
1111
1112/// Pull core scheduling cookie from a process.
1113///
1114/// # References
1115///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,…)`]
1116///
1117/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,…)`]: https://www.kernel.org/doc/html/v6.13/admin-guide/hw-vuln/core-scheduling.html
1118#[inline]
1119pub fn pull_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
1120    unsafe {
1121        syscalls::prctl(
1122            PR_SCHED_CORE,
1123            PR_SCHED_CORE_SHARE_FROM as *mut _,
1124            pid.as_raw_nonzero().get() as usize as *mut _,
1125            scope as usize as *mut _,
1126            ptr::null_mut(),
1127        )
1128        .map(|_r| ())
1129    }
1130}