diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 86fcac3e4b8a0..6fbfe37f13c0d 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -419,6 +419,13 @@ pub trait StrExt: Index { self.chars().flat_map(|c| c.escape_default()).collect() } + /// Escapes each char in `s` with `char::escape_control`. + #[unstable(feature = "collections", + reason = "return type may change to be an iterator")] + fn escape_control(&self) -> String { + self.chars().flat_map(|c| c.escape_control()).collect() + } + /// Escapes each char in `s` with `char::escape_unicode`. #[unstable(feature = "collections", reason = "return type may change to be an iterator")] @@ -2240,6 +2247,17 @@ mod tests { String::from_str("\\u{1d4ea}\\r")); } + #[test] + fn test_escape_control() { + assert_eq!("abc".escape_control(), String::from_str("abc")); + assert_eq!("öbµ".escape_control(), String::from_str("öbµ")); + assert_eq!("a c".escape_control(), String::from_str("a c")); + assert_eq!("\r\n\t".escape_control(), String::from_str("\\r\\n\\t")); + assert_eq!("'\"\\".escape_control(), String::from_str("\\'\\\"\\\\")); + assert_eq!("\u{100}".escape_control(), + String::from_str("\u{100}")); + } + #[test] fn test_total_ord() { "1234".cmp("123") == Greater; diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 8e27ae1cea970..89037f8d2a8b1 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -265,6 +265,41 @@ pub trait CharExt { #[stable(feature = "rust1", since = "1.0.0")] fn escape_default(self) -> EscapeDefault; + /// Escapes all C0 and C1 control characters (ISO 646 (ASCII), ISO 6429). + /// This method is called when printing a string with `{:?}`. The exact + /// rules for escaping are: + /// + /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively. + /// * Single-quote, double-quote and backslash chars are backslash- + /// escaped. + /// * Any other C0 ([0x00, 0x1f]) and C1 ([0x80, 0x9f]) chars are escaped + /// with the Rust unicode syntax: `\\u{NNNN}`. + /// * Any other chars are not escaped + /// + /// Note: 0x7f (delete) is often considered a control character, but is not + /// escaped! + /// + /// # Examples + /// + /// ``` + /// for c in "ä\n☃".chars() { + /// for i in c.escape_control() { + /// print!("{}", i); + /// } + /// println!(""); + /// } + /// ``` + /// + /// This prints: + /// + /// ```text + /// ä + /// \n + /// ☃ + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn escape_control(self) -> EscapeDefault; + /// Returns the number of bytes this character would need if encoded in UTF-8. /// /// # Examples @@ -394,6 +429,25 @@ impl CharExt for char { EscapeDefault { state: init_state } } + #[stable(feature = "rust1", since = "1.0.0")] + fn escape_control(self) -> EscapeDefault { + let init_state = match self { + // Backslash-escape special control character. + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' => EscapeDefaultState::Backslash('\\'), + '\'' => EscapeDefaultState::Backslash('\''), + '"' => EscapeDefaultState::Backslash('"'), + // Unicode-escape other C0 or C1 control character. + '\x00' ... '\x1f' | '\u{80}' ... '\u{9f}' => + EscapeDefaultState::Unicode(self.escape_unicode()), + // Don't escape anything else. + _ => EscapeDefaultState::Char(self), + }; + EscapeDefault { state: init_state } + } + #[inline] #[stable(feature = "rust1", since = "1.0.0")] fn len_utf8(self) -> usize { diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index 9544fbaa55b25..0d1f76b575b28 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -660,7 +660,7 @@ impl Display for bool { impl Debug for str { fn fmt(&self, f: &mut Formatter) -> Result { try!(write!(f, "\"")); - for c in self.chars().flat_map(|c| c.escape_default()) { + for c in self.chars().flat_map(|c| c.escape_control()) { try!(write!(f, "{}", c)); } write!(f, "\"") @@ -679,7 +679,7 @@ impl Debug for char { fn fmt(&self, f: &mut Formatter) -> Result { use char::CharExt; try!(write!(f, "'")); - for c in self.escape_default() { + for c in self.escape_control() { try!(write!(f, "{}", c)); } write!(f, "'") diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 32dc6440b1326..9afdafd19bb57 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -144,6 +144,37 @@ fn test_escape_default() { assert_eq!(s, "\\u{1d4b6}"); } +#[test] +fn test_escape_control() { + fn string(c: char) -> String { + c.escape_control().collect() + } + let s = string('\n'); + assert_eq!(s, "\\n"); + let s = string('\r'); + assert_eq!(s, "\\r"); + let s = string('\''); + assert_eq!(s, "\\'"); + let s = string('"'); + assert_eq!(s, "\\\""); + let s = string(' '); + assert_eq!(s, " "); + let s = string('a'); + assert_eq!(s, "a"); + let s = string('ä'); + assert_eq!(s, "ä"); + let s = string('~'); + assert_eq!(s, "~"); + let s = string('\x00'); + assert_eq!(s, "\\u{0}"); + let s = string('\x1f'); + assert_eq!(s, "\\u{1f}"); + let s = string('\u{ff}'); + assert_eq!(s, "\u{ff}"); + let s = string('\u{11b}'); + assert_eq!(s, "\u{11b}"); +} + #[test] fn test_escape_unicode() { fn string(c: char) -> String { c.escape_unicode().collect() } diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index c0f45ca4d7247..0fca0f8632a9e 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -77,6 +77,41 @@ pub trait CharExt { #[stable(feature = "rust1", since = "1.0.0")] fn escape_default(self) -> char::EscapeDefault; + /// Escapes all C0 and C1 control characters (ISO 646 (ASCII), ISO 6429). + /// This method is called when printing a string with `{:?}`. The exact + /// rules for escaping are: + /// + /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively. + /// * Single-quote, double-quote and backslash chars are backslash- + /// escaped. + /// * Any other C0 ([0x00, 0x1f]) and C1 ([0x80, 0x9f]) chars are escaped + /// with the Rust unicode syntax: `\\u{NNNN}`. + /// * Any other chars are not escaped + /// + /// Note: 0x7f (delete) is often considered a control character, but is not + /// escaped! + /// + /// # Examples + /// + /// ``` + /// for c in "ä\n☃".chars() { + /// for i in c.escape_control() { + /// print!("{}", i); + /// } + /// println!(""); + /// } + /// ``` + /// + /// This prints: + /// + /// ```text + /// ä + /// \n + /// ☃ + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn escape_control(self) -> char::EscapeDefault; + /// Returns the amount of bytes this character would need if encoded in /// UTF-8. #[stable(feature = "rust1", since = "1.0.0")] @@ -232,6 +267,8 @@ impl CharExt for char { #[stable(feature = "rust1", since = "1.0.0")] fn escape_default(self) -> char::EscapeDefault { C::escape_default(self) } #[stable(feature = "rust1", since = "1.0.0")] + fn escape_control(self) -> char::EscapeDefault { C::escape_control(self) } + #[stable(feature = "rust1", since = "1.0.0")] fn len_utf8(self) -> usize { C::len_utf8(self) } #[stable(feature = "rust1", since = "1.0.0")] fn len_utf16(self) -> usize { C::len_utf16(self) } diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs index 62b8ff528a5e2..8e24da2f882f0 100644 --- a/src/test/run-pass/ifmt.rs +++ b/src/test/run-pass/ifmt.rs @@ -62,7 +62,7 @@ pub fn main() { t!(format!("{}", '☃'), "☃"); t!(format!("{}", 10), "10"); t!(format!("{}", 10_usize), "10"); - t!(format!("{:?}", '☃'), "'\\u{2603}'"); + t!(format!("{:?}", '☃'), "'☃'"); t!(format!("{:?}", 10), "10"); t!(format!("{:?}", 10_usize), "10"); t!(format!("{:?}", "true"), "\"true\"");