1 // SPDX-License-Identifier: GPL-2.0 2 3 //! String representations. 4 5 use crate::alloc::{flags::*, AllocError, KVec}; 6 use core::fmt::{self, Write}; 7 use core::ops::{self, Deref, DerefMut, Index}; 8 9 use crate::error::{code::*, Error}; 10 11 /// Byte string without UTF-8 validity guarantee. 12 #[repr(transparent)] 13 pub struct BStr([u8]); 14 15 impl BStr { 16 /// Returns the length of this string. 17 #[inline] len(&self) -> usize18 pub const fn len(&self) -> usize { 19 self.0.len() 20 } 21 22 /// Returns `true` if the string is empty. 23 #[inline] is_empty(&self) -> bool24 pub const fn is_empty(&self) -> bool { 25 self.len() == 0 26 } 27 28 /// Creates a [`BStr`] from a `[u8]`. 29 #[inline] from_bytes(bytes: &[u8]) -> &Self30 pub const fn from_bytes(bytes: &[u8]) -> &Self { 31 // SAFETY: `BStr` is transparent to `[u8]`. 32 unsafe { &*(bytes as *const [u8] as *const BStr) } 33 } 34 35 /// Strip a prefix from `self`. Delegates to [`slice::strip_prefix`]. 36 /// 37 /// # Examples 38 /// 39 /// ``` 40 /// # use kernel::b_str; 41 /// assert_eq!(Some(b_str!("bar")), b_str!("foobar").strip_prefix(b_str!("foo"))); 42 /// assert_eq!(None, b_str!("foobar").strip_prefix(b_str!("bar"))); 43 /// assert_eq!(Some(b_str!("foobar")), b_str!("foobar").strip_prefix(b_str!(""))); 44 /// assert_eq!(Some(b_str!("")), b_str!("foobar").strip_prefix(b_str!("foobar"))); 45 /// ``` strip_prefix(&self, pattern: impl AsRef<Self>) -> Option<&BStr>46 pub fn strip_prefix(&self, pattern: impl AsRef<Self>) -> Option<&BStr> { 47 self.deref() 48 .strip_prefix(pattern.as_ref().deref()) 49 .map(Self::from_bytes) 50 } 51 } 52 53 impl fmt::Display for BStr { 54 /// Formats printable ASCII characters, escaping the rest. 55 /// 56 /// ``` 57 /// # use kernel::{fmt, b_str, str::{BStr, CString}}; 58 /// let ascii = b_str!("Hello, BStr!"); 59 /// let s = CString::try_from_fmt(fmt!("{}", ascii))?; 60 /// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes()); 61 /// 62 /// let non_ascii = b_str!(""); 63 /// let s = CString::try_from_fmt(fmt!("{}", non_ascii))?; 64 /// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); 65 /// # Ok::<(), kernel::error::Error>(()) 66 /// ``` fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result67 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 68 for &b in &self.0 { 69 match b { 70 // Common escape codes. 71 b'\t' => f.write_str("\\t")?, 72 b'\n' => f.write_str("\\n")?, 73 b'\r' => f.write_str("\\r")?, 74 // Printable characters. 75 0x20..=0x7e => f.write_char(b as char)?, 76 _ => write!(f, "\\x{b:02x}")?, 77 } 78 } 79 Ok(()) 80 } 81 } 82 83 impl fmt::Debug for BStr { 84 /// Formats printable ASCII characters with a double quote on either end, 85 /// escaping the rest. 86 /// 87 /// ``` 88 /// # use kernel::{fmt, b_str, str::{BStr, CString}}; 89 /// // Embedded double quotes are escaped. 90 /// let ascii = b_str!("Hello, \"BStr\"!"); 91 /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?; 92 /// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); 93 /// 94 /// let non_ascii = b_str!(""); 95 /// let s = CString::try_from_fmt(fmt!("{:?}", non_ascii))?; 96 /// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); 97 /// # Ok::<(), kernel::error::Error>(()) 98 /// ``` fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result99 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 100 f.write_char('"')?; 101 for &b in &self.0 { 102 match b { 103 // Common escape codes. 104 b'\t' => f.write_str("\\t")?, 105 b'\n' => f.write_str("\\n")?, 106 b'\r' => f.write_str("\\r")?, 107 // String escape characters. 108 b'\"' => f.write_str("\\\"")?, 109 b'\\' => f.write_str("\\\\")?, 110 // Printable characters. 111 0x20..=0x7e => f.write_char(b as char)?, 112 _ => write!(f, "\\x{b:02x}")?, 113 } 114 } 115 f.write_char('"') 116 } 117 } 118 119 impl Deref for BStr { 120 type Target = [u8]; 121 122 #[inline] deref(&self) -> &Self::Target123 fn deref(&self) -> &Self::Target { 124 &self.0 125 } 126 } 127 128 impl PartialEq for BStr { eq(&self, other: &Self) -> bool129 fn eq(&self, other: &Self) -> bool { 130 self.deref().eq(other.deref()) 131 } 132 } 133 134 impl<Idx> Index<Idx> for BStr 135 where 136 [u8]: Index<Idx, Output = [u8]>, 137 { 138 type Output = Self; 139 index(&self, index: Idx) -> &Self::Output140 fn index(&self, index: Idx) -> &Self::Output { 141 BStr::from_bytes(&self.0[index]) 142 } 143 } 144 145 impl AsRef<BStr> for [u8] { as_ref(&self) -> &BStr146 fn as_ref(&self) -> &BStr { 147 BStr::from_bytes(self) 148 } 149 } 150 151 impl AsRef<BStr> for BStr { as_ref(&self) -> &BStr152 fn as_ref(&self) -> &BStr { 153 self 154 } 155 } 156 157 /// Creates a new [`BStr`] from a string literal. 158 /// 159 /// `b_str!` converts the supplied string literal to byte string, so non-ASCII 160 /// characters can be included. 161 /// 162 /// # Examples 163 /// 164 /// ``` 165 /// # use kernel::b_str; 166 /// # use kernel::str::BStr; 167 /// const MY_BSTR: &BStr = b_str!("My awesome BStr!"); 168 /// ``` 169 #[macro_export] 170 macro_rules! b_str { 171 ($str:literal) => {{ 172 const S: &'static str = $str; 173 const C: &'static $crate::str::BStr = $crate::str::BStr::from_bytes(S.as_bytes()); 174 C 175 }}; 176 } 177 178 /// Possible errors when using conversion functions in [`CStr`]. 179 #[derive(Debug, Clone, Copy)] 180 pub enum CStrConvertError { 181 /// Supplied bytes contain an interior `NUL`. 182 InteriorNul, 183 184 /// Supplied bytes are not terminated by `NUL`. 185 NotNulTerminated, 186 } 187 188 impl From<CStrConvertError> for Error { 189 #[inline] from(_: CStrConvertError) -> Error190 fn from(_: CStrConvertError) -> Error { 191 EINVAL 192 } 193 } 194 195 /// A string that is guaranteed to have exactly one `NUL` byte, which is at the 196 /// end. 197 /// 198 /// Used for interoperability with kernel APIs that take C strings. 199 #[repr(transparent)] 200 pub struct CStr([u8]); 201 202 impl CStr { 203 /// Returns the length of this string excluding `NUL`. 204 #[inline] len(&self) -> usize205 pub const fn len(&self) -> usize { 206 self.len_with_nul() - 1 207 } 208 209 /// Returns the length of this string with `NUL`. 210 #[inline] len_with_nul(&self) -> usize211 pub const fn len_with_nul(&self) -> usize { 212 if self.0.is_empty() { 213 // SAFETY: This is one of the invariant of `CStr`. 214 // We add a `unreachable_unchecked` here to hint the optimizer that 215 // the value returned from this function is non-zero. 216 unsafe { core::hint::unreachable_unchecked() }; 217 } 218 self.0.len() 219 } 220 221 /// Returns `true` if the string only includes `NUL`. 222 #[inline] is_empty(&self) -> bool223 pub const fn is_empty(&self) -> bool { 224 self.len() == 0 225 } 226 227 /// Wraps a raw C string pointer. 228 /// 229 /// # Safety 230 /// 231 /// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must 232 /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr` 233 /// must not be mutated. 234 #[inline] from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self235 pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self { 236 // SAFETY: The safety precondition guarantees `ptr` is a valid pointer 237 // to a `NUL`-terminated C string. 238 let len = unsafe { bindings::strlen(ptr) } + 1; 239 // SAFETY: Lifetime guaranteed by the safety precondition. 240 let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len) }; 241 // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. 242 // As we have added 1 to `len`, the last byte is known to be `NUL`. 243 unsafe { Self::from_bytes_with_nul_unchecked(bytes) } 244 } 245 246 /// Creates a [`CStr`] from a `[u8]`. 247 /// 248 /// The provided slice must be `NUL`-terminated, does not contain any 249 /// interior `NUL` bytes. from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError>250 pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> { 251 if bytes.is_empty() { 252 return Err(CStrConvertError::NotNulTerminated); 253 } 254 if bytes[bytes.len() - 1] != 0 { 255 return Err(CStrConvertError::NotNulTerminated); 256 } 257 let mut i = 0; 258 // `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking, 259 // while it couldn't optimize away bounds checks for `i < bytes.len() - 1`. 260 while i + 1 < bytes.len() { 261 if bytes[i] == 0 { 262 return Err(CStrConvertError::InteriorNul); 263 } 264 i += 1; 265 } 266 // SAFETY: We just checked that all properties hold. 267 Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) 268 } 269 270 /// Creates a [`CStr`] from a `[u8]` without performing any additional 271 /// checks. 272 /// 273 /// # Safety 274 /// 275 /// `bytes` *must* end with a `NUL` byte, and should only have a single 276 /// `NUL` byte (or the string will be truncated). 277 #[inline] from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr278 pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { 279 // SAFETY: Properties of `bytes` guaranteed by the safety precondition. 280 unsafe { core::mem::transmute(bytes) } 281 } 282 283 /// Creates a mutable [`CStr`] from a `[u8]` without performing any 284 /// additional checks. 285 /// 286 /// # Safety 287 /// 288 /// `bytes` *must* end with a `NUL` byte, and should only have a single 289 /// `NUL` byte (or the string will be truncated). 290 #[inline] from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr291 pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { 292 // SAFETY: Properties of `bytes` guaranteed by the safety precondition. 293 unsafe { &mut *(bytes as *mut [u8] as *mut CStr) } 294 } 295 296 /// Returns a C pointer to the string. 297 #[inline] as_char_ptr(&self) -> *const crate::ffi::c_char298 pub const fn as_char_ptr(&self) -> *const crate::ffi::c_char { 299 self.0.as_ptr() 300 } 301 302 /// Convert the string to a byte slice without the trailing `NUL` byte. 303 #[inline] as_bytes(&self) -> &[u8]304 pub fn as_bytes(&self) -> &[u8] { 305 &self.0[..self.len()] 306 } 307 308 /// Convert the string to a byte slice containing the trailing `NUL` byte. 309 #[inline] as_bytes_with_nul(&self) -> &[u8]310 pub const fn as_bytes_with_nul(&self) -> &[u8] { 311 &self.0 312 } 313 314 /// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8. 315 /// 316 /// If the contents of the [`CStr`] are valid UTF-8 data, this 317 /// function will return the corresponding [`&str`] slice. Otherwise, 318 /// it will return an error with details of where UTF-8 validation failed. 319 /// 320 /// # Examples 321 /// 322 /// ``` 323 /// # use kernel::str::CStr; 324 /// let cstr = CStr::from_bytes_with_nul(b"foo\0")?; 325 /// assert_eq!(cstr.to_str(), Ok("foo")); 326 /// # Ok::<(), kernel::error::Error>(()) 327 /// ``` 328 #[inline] to_str(&self) -> Result<&str, core::str::Utf8Error>329 pub fn to_str(&self) -> Result<&str, core::str::Utf8Error> { 330 core::str::from_utf8(self.as_bytes()) 331 } 332 333 /// Unsafely convert this [`CStr`] into a [`&str`], without checking for 334 /// valid UTF-8. 335 /// 336 /// # Safety 337 /// 338 /// The contents must be valid UTF-8. 339 /// 340 /// # Examples 341 /// 342 /// ``` 343 /// # use kernel::c_str; 344 /// # use kernel::str::CStr; 345 /// let bar = c_str!("ツ"); 346 /// // SAFETY: String literals are guaranteed to be valid UTF-8 347 /// // by the Rust compiler. 348 /// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ"); 349 /// ``` 350 #[inline] as_str_unchecked(&self) -> &str351 pub unsafe fn as_str_unchecked(&self) -> &str { 352 // SAFETY: TODO. 353 unsafe { core::str::from_utf8_unchecked(self.as_bytes()) } 354 } 355 356 /// Convert this [`CStr`] into a [`CString`] by allocating memory and 357 /// copying over the string data. to_cstring(&self) -> Result<CString, AllocError>358 pub fn to_cstring(&self) -> Result<CString, AllocError> { 359 CString::try_from(self) 360 } 361 362 /// Converts this [`CStr`] to its ASCII lower case equivalent in-place. 363 /// 364 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', 365 /// but non-ASCII letters are unchanged. 366 /// 367 /// To return a new lowercased value without modifying the existing one, use 368 /// [`to_ascii_lowercase()`]. 369 /// 370 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase make_ascii_lowercase(&mut self)371 pub fn make_ascii_lowercase(&mut self) { 372 // INVARIANT: This doesn't introduce or remove NUL bytes in the C 373 // string. 374 self.0.make_ascii_lowercase(); 375 } 376 377 /// Converts this [`CStr`] to its ASCII upper case equivalent in-place. 378 /// 379 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', 380 /// but non-ASCII letters are unchanged. 381 /// 382 /// To return a new uppercased value without modifying the existing one, use 383 /// [`to_ascii_uppercase()`]. 384 /// 385 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase make_ascii_uppercase(&mut self)386 pub fn make_ascii_uppercase(&mut self) { 387 // INVARIANT: This doesn't introduce or remove NUL bytes in the C 388 // string. 389 self.0.make_ascii_uppercase(); 390 } 391 392 /// Returns a copy of this [`CString`] where each character is mapped to its 393 /// ASCII lower case equivalent. 394 /// 395 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', 396 /// but non-ASCII letters are unchanged. 397 /// 398 /// To lowercase the value in-place, use [`make_ascii_lowercase`]. 399 /// 400 /// [`make_ascii_lowercase`]: str::make_ascii_lowercase to_ascii_lowercase(&self) -> Result<CString, AllocError>401 pub fn to_ascii_lowercase(&self) -> Result<CString, AllocError> { 402 let mut s = self.to_cstring()?; 403 404 s.make_ascii_lowercase(); 405 406 Ok(s) 407 } 408 409 /// Returns a copy of this [`CString`] where each character is mapped to its 410 /// ASCII upper case equivalent. 411 /// 412 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', 413 /// but non-ASCII letters are unchanged. 414 /// 415 /// To uppercase the value in-place, use [`make_ascii_uppercase`]. 416 /// 417 /// [`make_ascii_uppercase`]: str::make_ascii_uppercase to_ascii_uppercase(&self) -> Result<CString, AllocError>418 pub fn to_ascii_uppercase(&self) -> Result<CString, AllocError> { 419 let mut s = self.to_cstring()?; 420 421 s.make_ascii_uppercase(); 422 423 Ok(s) 424 } 425 } 426 427 impl fmt::Display for CStr { 428 /// Formats printable ASCII characters, escaping the rest. 429 /// 430 /// ``` 431 /// # use kernel::c_str; 432 /// # use kernel::fmt; 433 /// # use kernel::str::CStr; 434 /// # use kernel::str::CString; 435 /// let penguin = c_str!(""); 436 /// let s = CString::try_from_fmt(fmt!("{}", penguin))?; 437 /// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); 438 /// 439 /// let ascii = c_str!("so \"cool\""); 440 /// let s = CString::try_from_fmt(fmt!("{}", ascii))?; 441 /// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes()); 442 /// # Ok::<(), kernel::error::Error>(()) 443 /// ``` fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result444 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 445 for &c in self.as_bytes() { 446 if (0x20..0x7f).contains(&c) { 447 // Printable character. 448 f.write_char(c as char)?; 449 } else { 450 write!(f, "\\x{c:02x}")?; 451 } 452 } 453 Ok(()) 454 } 455 } 456 457 impl fmt::Debug for CStr { 458 /// Formats printable ASCII characters with a double quote on either end, escaping the rest. 459 /// 460 /// ``` 461 /// # use kernel::c_str; 462 /// # use kernel::fmt; 463 /// # use kernel::str::CStr; 464 /// # use kernel::str::CString; 465 /// let penguin = c_str!(""); 466 /// let s = CString::try_from_fmt(fmt!("{:?}", penguin))?; 467 /// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes()); 468 /// 469 /// // Embedded double quotes are escaped. 470 /// let ascii = c_str!("so \"cool\""); 471 /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?; 472 /// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes()); 473 /// # Ok::<(), kernel::error::Error>(()) 474 /// ``` fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result475 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 476 f.write_str("\"")?; 477 for &c in self.as_bytes() { 478 match c { 479 // Printable characters. 480 b'\"' => f.write_str("\\\"")?, 481 0x20..=0x7e => f.write_char(c as char)?, 482 _ => write!(f, "\\x{c:02x}")?, 483 } 484 } 485 f.write_str("\"") 486 } 487 } 488 489 impl AsRef<BStr> for CStr { 490 #[inline] as_ref(&self) -> &BStr491 fn as_ref(&self) -> &BStr { 492 BStr::from_bytes(self.as_bytes()) 493 } 494 } 495 496 impl Deref for CStr { 497 type Target = BStr; 498 499 #[inline] deref(&self) -> &Self::Target500 fn deref(&self) -> &Self::Target { 501 self.as_ref() 502 } 503 } 504 505 impl Index<ops::RangeFrom<usize>> for CStr { 506 type Output = CStr; 507 508 #[inline] index(&self, index: ops::RangeFrom<usize>) -> &Self::Output509 fn index(&self, index: ops::RangeFrom<usize>) -> &Self::Output { 510 // Delegate bounds checking to slice. 511 // Assign to _ to mute clippy's unnecessary operation warning. 512 let _ = &self.as_bytes()[index.start..]; 513 // SAFETY: We just checked the bounds. 514 unsafe { Self::from_bytes_with_nul_unchecked(&self.0[index.start..]) } 515 } 516 } 517 518 impl Index<ops::RangeFull> for CStr { 519 type Output = CStr; 520 521 #[inline] index(&self, _index: ops::RangeFull) -> &Self::Output522 fn index(&self, _index: ops::RangeFull) -> &Self::Output { 523 self 524 } 525 } 526 527 mod private { 528 use core::ops; 529 530 // Marker trait for index types that can be forward to `BStr`. 531 pub trait CStrIndex {} 532 533 impl CStrIndex for usize {} 534 impl CStrIndex for ops::Range<usize> {} 535 impl CStrIndex for ops::RangeInclusive<usize> {} 536 impl CStrIndex for ops::RangeToInclusive<usize> {} 537 } 538 539 impl<Idx> Index<Idx> for CStr 540 where 541 Idx: private::CStrIndex, 542 BStr: Index<Idx>, 543 { 544 type Output = <BStr as Index<Idx>>::Output; 545 546 #[inline] index(&self, index: Idx) -> &Self::Output547 fn index(&self, index: Idx) -> &Self::Output { 548 &self.as_ref()[index] 549 } 550 } 551 552 /// Creates a new [`CStr`] from a string literal. 553 /// 554 /// The string literal should not contain any `NUL` bytes. 555 /// 556 /// # Examples 557 /// 558 /// ``` 559 /// # use kernel::c_str; 560 /// # use kernel::str::CStr; 561 /// const MY_CSTR: &CStr = c_str!("My awesome CStr!"); 562 /// ``` 563 #[macro_export] 564 macro_rules! c_str { 565 ($str:expr) => {{ 566 const S: &str = concat!($str, "\0"); 567 const C: &$crate::str::CStr = match $crate::str::CStr::from_bytes_with_nul(S.as_bytes()) { 568 Ok(v) => v, 569 Err(_) => panic!("string contains interior NUL"), 570 }; 571 C 572 }}; 573 } 574 575 #[cfg(test)] 576 #[expect(clippy::items_after_test_module)] 577 mod tests { 578 use super::*; 579 580 struct String(CString); 581 582 impl String { from_fmt(args: fmt::Arguments<'_>) -> Self583 fn from_fmt(args: fmt::Arguments<'_>) -> Self { 584 String(CString::try_from_fmt(args).unwrap()) 585 } 586 } 587 588 impl Deref for String { 589 type Target = str; 590 deref(&self) -> &str591 fn deref(&self) -> &str { 592 self.0.to_str().unwrap() 593 } 594 } 595 596 macro_rules! format { 597 ($($f:tt)*) => ({ 598 &*String::from_fmt(kernel::fmt!($($f)*)) 599 }) 600 } 601 602 const ALL_ASCII_CHARS: &str = 603 "\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\ 604 \\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f \ 605 !\"#$%&'()*+,-./0123456789:;<=>?@\ 606 ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\ 607 \\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\ 608 \\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\ 609 \\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\ 610 \\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\ 611 \\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\ 612 \\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\ 613 \\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\ 614 \\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff"; 615 616 #[test] test_cstr_to_str()617 fn test_cstr_to_str() { 618 let good_bytes = b"\xf0\x9f\xa6\x80\0"; 619 let checked_cstr = CStr::from_bytes_with_nul(good_bytes).unwrap(); 620 let checked_str = checked_cstr.to_str().unwrap(); 621 assert_eq!(checked_str, ""); 622 } 623 624 #[test] 625 #[should_panic] test_cstr_to_str_panic()626 fn test_cstr_to_str_panic() { 627 let bad_bytes = b"\xc3\x28\0"; 628 let checked_cstr = CStr::from_bytes_with_nul(bad_bytes).unwrap(); 629 checked_cstr.to_str().unwrap(); 630 } 631 632 #[test] test_cstr_as_str_unchecked()633 fn test_cstr_as_str_unchecked() { 634 let good_bytes = b"\xf0\x9f\x90\xA7\0"; 635 let checked_cstr = CStr::from_bytes_with_nul(good_bytes).unwrap(); 636 // SAFETY: The contents come from a string literal which contains valid UTF-8. 637 let unchecked_str = unsafe { checked_cstr.as_str_unchecked() }; 638 assert_eq!(unchecked_str, ""); 639 } 640 641 #[test] test_cstr_display()642 fn test_cstr_display() { 643 let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); 644 assert_eq!(format!("{hello_world}"), "hello, world!"); 645 let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); 646 assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a"); 647 let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); 648 assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); 649 let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); 650 assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); 651 } 652 653 #[test] test_cstr_display_all_bytes()654 fn test_cstr_display_all_bytes() { 655 let mut bytes: [u8; 256] = [0; 256]; 656 // fill `bytes` with [1..=255] + [0] 657 for i in u8::MIN..=u8::MAX { 658 bytes[i as usize] = i.wrapping_add(1); 659 } 660 let cstr = CStr::from_bytes_with_nul(&bytes).unwrap(); 661 assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS); 662 } 663 664 #[test] test_cstr_debug()665 fn test_cstr_debug() { 666 let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); 667 assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); 668 let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); 669 assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\""); 670 let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); 671 assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); 672 let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); 673 assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); 674 } 675 676 #[test] test_bstr_display()677 fn test_bstr_display() { 678 let hello_world = BStr::from_bytes(b"hello, world!"); 679 assert_eq!(format!("{hello_world}"), "hello, world!"); 680 let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); 681 assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_"); 682 let others = BStr::from_bytes(b"\x01"); 683 assert_eq!(format!("{others}"), "\\x01"); 684 let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); 685 assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); 686 let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); 687 assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); 688 } 689 690 #[test] test_bstr_debug()691 fn test_bstr_debug() { 692 let hello_world = BStr::from_bytes(b"hello, world!"); 693 assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); 694 let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); 695 assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); 696 let others = BStr::from_bytes(b"\x01"); 697 assert_eq!(format!("{others:?}"), "\"\\x01\""); 698 let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); 699 assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); 700 let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); 701 assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); 702 } 703 } 704 705 /// Allows formatting of [`fmt::Arguments`] into a raw buffer. 706 /// 707 /// It does not fail if callers write past the end of the buffer so that they can calculate the 708 /// size required to fit everything. 709 /// 710 /// # Invariants 711 /// 712 /// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos` 713 /// is less than `end`. 714 pub(crate) struct RawFormatter { 715 // Use `usize` to use `saturating_*` functions. 716 beg: usize, 717 pos: usize, 718 end: usize, 719 } 720 721 impl RawFormatter { 722 /// Creates a new instance of [`RawFormatter`] with an empty buffer. new() -> Self723 fn new() -> Self { 724 // INVARIANT: The buffer is empty, so the region that needs to be writable is empty. 725 Self { 726 beg: 0, 727 pos: 0, 728 end: 0, 729 } 730 } 731 732 /// Creates a new instance of [`RawFormatter`] with the given buffer pointers. 733 /// 734 /// # Safety 735 /// 736 /// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end` 737 /// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`]. from_ptrs(pos: *mut u8, end: *mut u8) -> Self738 pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self { 739 // INVARIANT: The safety requirements guarantee the type invariants. 740 Self { 741 beg: pos as _, 742 pos: pos as _, 743 end: end as _, 744 } 745 } 746 747 /// Creates a new instance of [`RawFormatter`] with the given buffer. 748 /// 749 /// # Safety 750 /// 751 /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes 752 /// for the lifetime of the returned [`RawFormatter`]. from_buffer(buf: *mut u8, len: usize) -> Self753 pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { 754 let pos = buf as usize; 755 // INVARIANT: We ensure that `end` is never less then `buf`, and the safety requirements 756 // guarantees that the memory region is valid for writes. 757 Self { 758 pos, 759 beg: pos, 760 end: pos.saturating_add(len), 761 } 762 } 763 764 /// Returns the current insert position. 765 /// 766 /// N.B. It may point to invalid memory. pos(&self) -> *mut u8767 pub(crate) fn pos(&self) -> *mut u8 { 768 self.pos as _ 769 } 770 771 /// Returns the number of bytes written to the formatter. bytes_written(&self) -> usize772 pub(crate) fn bytes_written(&self) -> usize { 773 self.pos - self.beg 774 } 775 } 776 777 impl fmt::Write for RawFormatter { write_str(&mut self, s: &str) -> fmt::Result778 fn write_str(&mut self, s: &str) -> fmt::Result { 779 // `pos` value after writing `len` bytes. This does not have to be bounded by `end`, but we 780 // don't want it to wrap around to 0. 781 let pos_new = self.pos.saturating_add(s.len()); 782 783 // Amount that we can copy. `saturating_sub` ensures we get 0 if `pos` goes past `end`. 784 let len_to_copy = core::cmp::min(pos_new, self.end).saturating_sub(self.pos); 785 786 if len_to_copy > 0 { 787 // SAFETY: If `len_to_copy` is non-zero, then we know `pos` has not gone past `end` 788 // yet, so it is valid for write per the type invariants. 789 unsafe { 790 core::ptr::copy_nonoverlapping( 791 s.as_bytes().as_ptr(), 792 self.pos as *mut u8, 793 len_to_copy, 794 ) 795 }; 796 } 797 798 self.pos = pos_new; 799 Ok(()) 800 } 801 } 802 803 /// Allows formatting of [`fmt::Arguments`] into a raw buffer. 804 /// 805 /// Fails if callers attempt to write more than will fit in the buffer. 806 pub(crate) struct Formatter(RawFormatter); 807 808 impl Formatter { 809 /// Creates a new instance of [`Formatter`] with the given buffer. 810 /// 811 /// # Safety 812 /// 813 /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes 814 /// for the lifetime of the returned [`Formatter`]. from_buffer(buf: *mut u8, len: usize) -> Self815 pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { 816 // SAFETY: The safety requirements of this function satisfy those of the callee. 817 Self(unsafe { RawFormatter::from_buffer(buf, len) }) 818 } 819 } 820 821 impl Deref for Formatter { 822 type Target = RawFormatter; 823 deref(&self) -> &Self::Target824 fn deref(&self) -> &Self::Target { 825 &self.0 826 } 827 } 828 829 impl fmt::Write for Formatter { write_str(&mut self, s: &str) -> fmt::Result830 fn write_str(&mut self, s: &str) -> fmt::Result { 831 self.0.write_str(s)?; 832 833 // Fail the request if we go past the end of the buffer. 834 if self.0.pos > self.0.end { 835 Err(fmt::Error) 836 } else { 837 Ok(()) 838 } 839 } 840 } 841 842 /// An owned string that is guaranteed to have exactly one `NUL` byte, which is at the end. 843 /// 844 /// Used for interoperability with kernel APIs that take C strings. 845 /// 846 /// # Invariants 847 /// 848 /// The string is always `NUL`-terminated and contains no other `NUL` bytes. 849 /// 850 /// # Examples 851 /// 852 /// ``` 853 /// use kernel::{str::CString, fmt}; 854 /// 855 /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?; 856 /// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes()); 857 /// 858 /// let tmp = "testing"; 859 /// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123))?; 860 /// assert_eq!(s.as_bytes_with_nul(), "testing123\0".as_bytes()); 861 /// 862 /// // This fails because it has an embedded `NUL` byte. 863 /// let s = CString::try_from_fmt(fmt!("a\0b{}", 123)); 864 /// assert_eq!(s.is_ok(), false); 865 /// # Ok::<(), kernel::error::Error>(()) 866 /// ``` 867 pub struct CString { 868 buf: KVec<u8>, 869 } 870 871 impl CString { 872 /// Creates an instance of [`CString`] from the given formatted arguments. try_from_fmt(args: fmt::Arguments<'_>) -> Result<Self, Error>873 pub fn try_from_fmt(args: fmt::Arguments<'_>) -> Result<Self, Error> { 874 // Calculate the size needed (formatted string plus `NUL` terminator). 875 let mut f = RawFormatter::new(); 876 f.write_fmt(args)?; 877 f.write_str("\0")?; 878 let size = f.bytes_written(); 879 880 // Allocate a vector with the required number of bytes, and write to it. 881 let mut buf = KVec::with_capacity(size, GFP_KERNEL)?; 882 // SAFETY: The buffer stored in `buf` is at least of size `size` and is valid for writes. 883 let mut f = unsafe { Formatter::from_buffer(buf.as_mut_ptr(), size) }; 884 f.write_fmt(args)?; 885 f.write_str("\0")?; 886 887 // SAFETY: The number of bytes that can be written to `f` is bounded by `size`, which is 888 // `buf`'s capacity. The contents of the buffer have been initialised by writes to `f`. 889 unsafe { buf.set_len(f.bytes_written()) }; 890 891 // Check that there are no `NUL` bytes before the end. 892 // SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size` 893 // (which the minimum buffer size) and is non-zero (we wrote at least the `NUL` terminator) 894 // so `f.bytes_written() - 1` doesn't underflow. 895 let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, f.bytes_written() - 1) }; 896 if !ptr.is_null() { 897 return Err(EINVAL); 898 } 899 900 // INVARIANT: We wrote the `NUL` terminator and checked above that no other `NUL` bytes 901 // exist in the buffer. 902 Ok(Self { buf }) 903 } 904 } 905 906 impl Deref for CString { 907 type Target = CStr; 908 deref(&self) -> &Self::Target909 fn deref(&self) -> &Self::Target { 910 // SAFETY: The type invariants guarantee that the string is `NUL`-terminated and that no 911 // other `NUL` bytes exist. 912 unsafe { CStr::from_bytes_with_nul_unchecked(self.buf.as_slice()) } 913 } 914 } 915 916 impl DerefMut for CString { deref_mut(&mut self) -> &mut Self::Target917 fn deref_mut(&mut self) -> &mut Self::Target { 918 // SAFETY: A `CString` is always NUL-terminated and contains no other 919 // NUL bytes. 920 unsafe { CStr::from_bytes_with_nul_unchecked_mut(self.buf.as_mut_slice()) } 921 } 922 } 923 924 impl<'a> TryFrom<&'a CStr> for CString { 925 type Error = AllocError; 926 try_from(cstr: &'a CStr) -> Result<CString, AllocError>927 fn try_from(cstr: &'a CStr) -> Result<CString, AllocError> { 928 let mut buf = KVec::new(); 929 930 buf.extend_from_slice(cstr.as_bytes_with_nul(), GFP_KERNEL)?; 931 932 // INVARIANT: The `CStr` and `CString` types have the same invariants for 933 // the string data, and we copied it over without changes. 934 Ok(CString { buf }) 935 } 936 } 937 938 impl fmt::Debug for CString { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result939 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 940 fmt::Debug::fmt(&**self, f) 941 } 942 } 943 944 /// A convenience alias for [`core::format_args`]. 945 #[macro_export] 946 macro_rules! fmt { 947 ($($f:tt)*) => ( core::format_args!($($f)*) ) 948 } 949