trust_dns_proto/rr/domain/
label.rs

1// Copyright 2015-2018 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! Labels are used as the internal components of a Name.
9//!
10//! A label is stored internally as ascii, where all unicode characters are converted to punycode internally.
11
12#[allow(clippy::useless_attribute)]
13#[allow(unused)]
14#[allow(deprecated)]
15use std::ascii::AsciiExt;
16use std::borrow::Borrow;
17use std::cmp::{Ordering, PartialEq};
18use std::fmt::{self, Debug, Display, Formatter, Write};
19use std::hash::{Hash, Hasher};
20use tinyvec::TinyVec;
21
22use idna;
23use tracing::debug;
24
25use crate::error::*;
26
27const WILDCARD: &[u8] = b"*";
28const IDNA_PREFIX: &[u8] = b"xn--";
29
30/// Labels are always stored as ASCII, unicode characters must be encoded with punycode
31#[derive(Clone, Eq)]
32pub struct Label(TinyVec<[u8; 24]>);
33
34impl Label {
35    /// These must only be ASCII, with unicode encoded to PunyCode, or other such transformation.
36    ///
37    /// This uses the bytes as raw ascii values, with nothing escaped on the wire.
38    /// Generally users should use `from_str` or `from_ascii`
39    pub fn from_raw_bytes(bytes: &[u8]) -> ProtoResult<Self> {
40        // Check for label validity.
41        // RFC 2181, Section 11 "Name Syntax".
42        // > The length of any one label is limited to between 1 and 63 octets.
43        if bytes.is_empty() {
44            return Err("Label requires a minimum length of 1".into());
45        }
46        if bytes.len() > 63 {
47            return Err(ProtoErrorKind::LabelBytesTooLong(bytes.len()).into());
48        };
49        Ok(Self(TinyVec::from(bytes)))
50    }
51
52    /// Translates this string into IDNA safe name, encoding to punycode as necessary.
53    pub fn from_utf8(s: &str) -> ProtoResult<Self> {
54        if s.as_bytes() == WILDCARD {
55            return Ok(Self::wildcard());
56        }
57
58        // special case for SRV type records
59        if s.starts_with('_') {
60            return Self::from_ascii(s);
61        }
62
63        match idna::Config::default()
64            .use_std3_ascii_rules(true)
65            .transitional_processing(true)
66            // length don't exceding 63 is done in `from_ascii`
67            // on puny encoded string
68            // idna error are opaque so early failure is not possible.
69            .verify_dns_length(false)
70            .to_ascii(s)
71        {
72            Ok(puny) => Self::from_ascii(&puny),
73            e => Err(format!("Label contains invalid characters: {e:?}").into()),
74        }
75    }
76
77    /// Takes the ascii string and returns a new label.
78    ///
79    /// This will return an Error if the label is not an ascii string
80    pub fn from_ascii(s: &str) -> ProtoResult<Self> {
81        if s.len() > 63 {
82            return Err(ProtoErrorKind::LabelBytesTooLong(s.len()).into());
83        }
84
85        if s.as_bytes() == WILDCARD {
86            return Ok(Self::wildcard());
87        }
88
89        if !s.is_empty()
90            && s.is_ascii()
91            && s.chars().take(1).all(|c| is_safe_ascii(c, true, false))
92            && s.chars().skip(1).all(|c| is_safe_ascii(c, false, false))
93        {
94            Self::from_raw_bytes(s.as_bytes())
95        } else {
96            Err(format!("Malformed label: {s}").into())
97        }
98    }
99
100    /// Returns a new Label of the Wildcard, i.e. "*"
101    pub fn wildcard() -> Self {
102        Self(TinyVec::from(WILDCARD))
103    }
104
105    /// Converts this label to lowercase
106    pub fn to_lowercase(&self) -> Self {
107        // TODO: replace case conversion when (ascii_ctype #39658) stabilizes
108        if let Some((idx, _)) = self
109            .0
110            .iter()
111            .enumerate()
112            .find(|&(_, c)| *c != c.to_ascii_lowercase())
113        {
114            let mut lower_label: Vec<u8> = self.0.to_vec();
115            lower_label[idx..].make_ascii_lowercase();
116            Self(TinyVec::from(lower_label.as_slice()))
117        } else {
118            self.clone()
119        }
120    }
121
122    /// Returns true if this label is the wildcard, '*', label
123    pub fn is_wildcard(&self) -> bool {
124        self.as_bytes() == WILDCARD
125    }
126
127    /// Returns the lenght in bytes of this label
128    pub fn len(&self) -> usize {
129        self.0.len()
130    }
131
132    /// True if the label contains no characters
133    pub fn is_empty(&self) -> bool {
134        self.0.is_empty()
135    }
136
137    /// Returns the raw bytes of the label, this is good for writing to the wire.
138    ///
139    /// See [`Display`] for presentation version (unescaped from punycode, etc)
140    pub fn as_bytes(&self) -> &[u8] {
141        &self.0
142    }
143
144    /// Performs the equivalence operation disregarding case
145    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
146        self.0.eq_ignore_ascii_case(&other.0)
147    }
148
149    /// compares with the other label, ignoring case
150    pub fn cmp_with_f<F: LabelCmp>(&self, other: &Self) -> Ordering {
151        let s = self.0.iter();
152        let o = other.0.iter();
153
154        for (s, o) in s.zip(o) {
155            match F::cmp_u8(*s, *o) {
156                Ordering::Equal => continue,
157                not_eq => return not_eq,
158            }
159        }
160
161        self.0.len().cmp(&other.0.len())
162    }
163
164    /// Performs the conversion to utf8 from IDNA as necessary, see `fmt` for more details
165    pub fn to_utf8(&self) -> String {
166        format!("{self}")
167    }
168
169    /// Converts this label to safe ascii, escaping characters as necessary
170    ///
171    /// If this is an IDNA, punycode, label, then the xn-- prefix will be maintained as ascii
172    pub fn to_ascii(&self) -> String {
173        let mut ascii = String::with_capacity(self.as_bytes().len());
174
175        self.write_ascii(&mut ascii)
176            .expect("should never fail to write a new string");
177        ascii
178    }
179
180    /// Writes this label to safe ascii, escaping characters as necessary
181    pub fn write_ascii<W: Write>(&self, f: &mut W) -> Result<(), fmt::Error> {
182        // We can't guarantee that the same input will always translate to the same output
183        fn escape_non_ascii<W: Write>(
184            byte: u8,
185            f: &mut W,
186            is_first: bool,
187        ) -> Result<(), fmt::Error> {
188            let to_triple_escape = |ch: u8| format!("\\{ch:03o}");
189            let to_single_escape = |ch: char| format!("\\{ch}");
190
191            match char::from(byte) {
192                c if is_safe_ascii(c, is_first, true) => f.write_char(c)?,
193                // it's not a control and is printable as well as inside the standard ascii range
194                c if byte > b'\x20' && byte < b'\x7f' => f.write_str(&to_single_escape(c))?,
195                _ => f.write_str(&to_triple_escape(byte))?,
196            }
197
198            Ok(())
199        }
200
201        // traditional ascii case...
202        let mut chars = self.as_bytes().iter();
203        if let Some(ch) = chars.next() {
204            escape_non_ascii(*ch, f, true)?;
205        }
206
207        for ch in chars {
208            escape_non_ascii(*ch, f, false)?;
209        }
210
211        Ok(())
212    }
213}
214
215impl AsRef<[u8]> for Label {
216    fn as_ref(&self) -> &[u8] {
217        self.as_bytes()
218    }
219}
220
221impl Borrow<[u8]> for Label {
222    fn borrow(&self) -> &[u8] {
223        &self.0
224    }
225}
226
227fn is_safe_ascii(c: char, is_first: bool, for_encoding: bool) -> bool {
228    match c {
229        c if !c.is_ascii() => false,
230        c if c.is_alphanumeric() => true,
231        '-' if !is_first => true,     // dash is allowed
232        '_' => true,                  // SRV like labels
233        '*' if is_first => true,      // wildcard
234        '.' if !for_encoding => true, // needed to allow dots, for things like email addresses
235        _ => false,
236    }
237}
238
239impl Display for Label {
240    /// outputs characters in a safe string manner.
241    ///
242    /// if the string is punycode, i.e. starts with `xn--`, otherwise it translates to a safe ascii string
243    ///   escaping characters as necessary.
244    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
245        if self.as_bytes().starts_with(IDNA_PREFIX) {
246            // this should never be outside the ascii codes...
247            let label = String::from_utf8_lossy(self.borrow());
248            let (label, e) = idna::Config::default()
249                .use_std3_ascii_rules(false)
250                .transitional_processing(false)
251                .verify_dns_length(false)
252                .to_unicode(&label);
253
254            if e.is_ok() {
255                return f.write_str(&label);
256            } else {
257                debug!(
258                    "xn-- prefixed string did not translate via IDNA properly: {:?}",
259                    e
260                )
261            }
262        }
263
264        // it wasn't known to be utf8
265        self.write_ascii(f)
266    }
267}
268
269impl Debug for Label {
270    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
271        let label = String::from_utf8_lossy(self.borrow());
272        f.write_str(&label)
273    }
274}
275
276impl PartialEq<Self> for Label {
277    fn eq(&self, other: &Self) -> bool {
278        self.eq_ignore_ascii_case(other)
279    }
280}
281
282impl PartialOrd<Self> for Label {
283    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
284        Some(self.cmp(other))
285    }
286}
287
288impl Ord for Label {
289    fn cmp(&self, other: &Self) -> Ordering {
290        self.cmp_with_f::<CaseInsensitive>(other)
291    }
292}
293
294impl Hash for Label {
295    fn hash<H>(&self, state: &mut H)
296    where
297        H: Hasher,
298    {
299        for b in self.borrow() as &[u8] {
300            state.write_u8(b.to_ascii_lowercase());
301        }
302    }
303}
304
305/// Label comparison trait for case sensitive or insensitive comparisons
306pub trait LabelCmp {
307    /// this should mimic the cmp method from [`PartialOrd`]
308    fn cmp_u8(l: u8, r: u8) -> Ordering;
309}
310
311/// For case sensitive comparisons
312pub(super) struct CaseSensitive;
313
314impl LabelCmp for CaseSensitive {
315    fn cmp_u8(l: u8, r: u8) -> Ordering {
316        l.cmp(&r)
317    }
318}
319
320/// For case insensitive comparisons
321pub(super) struct CaseInsensitive;
322
323impl LabelCmp for CaseInsensitive {
324    fn cmp_u8(l: u8, r: u8) -> Ordering {
325        l.to_ascii_lowercase().cmp(&r.to_ascii_lowercase())
326    }
327}
328
329/// Conversion into a Label
330pub trait IntoLabel: Sized {
331    /// Convert this into Label
332    fn into_label(self) -> ProtoResult<Label>;
333}
334
335impl<'a> IntoLabel for &'a Label {
336    fn into_label(self) -> ProtoResult<Label> {
337        Ok(self.clone())
338    }
339}
340
341impl IntoLabel for Label {
342    fn into_label(self) -> ProtoResult<Label> {
343        Ok(self)
344    }
345}
346
347impl<'a> IntoLabel for &'a str {
348    fn into_label(self) -> ProtoResult<Label> {
349        Label::from_utf8(self)
350    }
351}
352
353impl IntoLabel for String {
354    fn into_label(self) -> ProtoResult<Label> {
355        Label::from_utf8(&self)
356    }
357}
358
359impl<'a> IntoLabel for &'a [u8] {
360    fn into_label(self) -> ProtoResult<Label> {
361        Label::from_raw_bytes(self)
362    }
363}
364
365impl IntoLabel for Vec<u8> {
366    fn into_label(self) -> ProtoResult<Label> {
367        Label::from_raw_bytes(&self)
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    #![allow(clippy::dbg_macro, clippy::print_stdout)]
374
375    use super::*;
376
377    #[test]
378    fn test_encoding() {
379        assert_eq!(
380            Label::from_utf8("abc").unwrap(),
381            Label::from_raw_bytes(b"abc").unwrap()
382        );
383        // case insensitive, this works...
384        assert_eq!(
385            Label::from_utf8("ABC").unwrap(),
386            Label::from_raw_bytes(b"ABC").unwrap()
387        );
388        assert_eq!(
389            Label::from_utf8("🦀").unwrap(),
390            Label::from_raw_bytes(b"xn--zs9h").unwrap()
391        );
392        assert_eq!(
393            Label::from_utf8("rust-🦀-icon").unwrap(),
394            Label::from_raw_bytes(b"xn--rust--icon-9447i").unwrap()
395        );
396        assert_eq!(
397            Label::from_ascii("ben.fry").unwrap(),
398            Label::from_raw_bytes(b"ben.fry").unwrap()
399        );
400        assert_eq!(Label::from_utf8("🦀").unwrap().to_utf8(), "🦀");
401        assert_eq!(Label::from_utf8("🦀").unwrap().to_ascii(), "xn--zs9h");
402    }
403
404    fn assert_panic_label_too_long(error: ProtoResult<Label>, len: usize) {
405        // poor man debug since ProtoResult don't implement Partial Eq due to ssl errors.
406        eprintln!("{error:?}");
407        assert!(error.is_err());
408        match *error.unwrap_err().kind() {
409            ProtoErrorKind::LabelBytesTooLong(n) if n == len => (),
410            ProtoErrorKind::LabelBytesTooLong(e) => {
411                panic!(
412                    "LabelTooLongError error don't report expected size {} of the label provided.",
413                    e
414                )
415            }
416            _ => panic!("Should have returned a LabelTooLongError"),
417        }
418    }
419
420    #[test]
421    fn test_label_too_long_ascii_with_utf8() {
422        let label_too_long = "alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
423        let error = Label::from_utf8(label_too_long);
424        assert_panic_label_too_long(error, label_too_long.len());
425    }
426
427    #[test]
428    fn test_label_too_long_utf8_puny_emoji() {
429        // too long only puny 65
430        let emoji_case = "💜🦀🏖️🖥️😨🚀✨🤖💚🦾🦿😱😨✉️👺📚💻🗓️🤡🦀😈🚀💀⚡🦄";
431        let error = Label::from_utf8(emoji_case);
432        assert_panic_label_too_long(error, 64);
433    }
434
435    #[test]
436    fn test_label_too_long_utf8_puny_emoji_mixed() {
437        // too long mixed 65
438        // Something international to say
439        // "Hello I like automn coffee 🦀 interresting"
440        let emoji_case = "こんにちは-I-mögen-jesień-café-🦀-intéressant";
441        let error = Label::from_utf8(emoji_case);
442        assert_panic_label_too_long(error, 65);
443    }
444
445    #[test]
446    fn test_label_too_long_utf8_puny_mixed() {
447        // edge case 64 octet long.
448        // xn--testwithalonglabelinutf8tofitin63octetsisagoodhabit-f2106cqb
449        let edge_case = "🦀testwithalonglabelinutf8tofitin63octetsisagoodhabit🦀";
450        let error = Label::from_utf8(edge_case);
451        assert_panic_label_too_long(error, 64);
452    }
453
454    #[test]
455    fn test_label_too_long_raw() {
456        let label_too_long = b"alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
457        let error = Label::from_raw_bytes(label_too_long);
458        assert_panic_label_too_long(error, label_too_long.len());
459    }
460
461    #[test]
462    fn test_label_too_long_ascii() {
463        let label_too_long = "alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
464        let error = Label::from_ascii(label_too_long);
465        assert_panic_label_too_long(error, label_too_long.len());
466    }
467
468    #[test]
469    fn test_decoding() {
470        assert_eq!(Label::from_raw_bytes(b"abc").unwrap().to_string(), "abc");
471        assert_eq!(
472            Label::from_raw_bytes(b"xn--zs9h").unwrap().to_string(),
473            "🦀"
474        );
475        assert_eq!(
476            Label::from_raw_bytes(b"xn--rust--icon-9447i")
477                .unwrap()
478                .to_string(),
479            "rust-🦀-icon"
480        );
481    }
482
483    #[test]
484    fn test_from_ascii_adversial_utf8() {
485        let expect_err = Label::from_ascii("🦀");
486        assert!(expect_err.is_err());
487    }
488
489    #[test]
490    fn test_to_lowercase() {
491        assert_ne!(Label::from_ascii("ABC").unwrap().to_string(), "abc");
492        assert_ne!(Label::from_ascii("abcDEF").unwrap().to_string(), "abcdef");
493        assert_eq!(
494            Label::from_ascii("ABC").unwrap().to_lowercase().to_string(),
495            "abc"
496        );
497        assert_eq!(
498            Label::from_ascii("abcDEF")
499                .unwrap()
500                .to_lowercase()
501                .to_string(),
502            "abcdef"
503        );
504    }
505
506    #[test]
507    fn test_to_cmp_f() {
508        assert_eq!(
509            Label::from_ascii("ABC")
510                .unwrap()
511                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abc").unwrap()),
512            Ordering::Equal
513        );
514        assert_eq!(
515            Label::from_ascii("abcDEF")
516                .unwrap()
517                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abcdef").unwrap()),
518            Ordering::Equal
519        );
520        assert_eq!(
521            Label::from_ascii("ABC")
522                .unwrap()
523                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abc").unwrap()),
524            Ordering::Less
525        );
526        assert_eq!(
527            Label::from_ascii("abcDEF")
528                .unwrap()
529                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abcdef").unwrap()),
530            Ordering::Less
531        );
532    }
533
534    #[test]
535    fn test_partial_cmp() {
536        let comparisons: Vec<(Label, Label)> = vec![
537            (
538                Label::from_raw_bytes(b"yljkjljk").unwrap(),
539                Label::from_raw_bytes(b"Z").unwrap(),
540            ),
541            (
542                Label::from_raw_bytes(b"Z").unwrap(),
543                Label::from_raw_bytes(b"zABC").unwrap(),
544            ),
545            (
546                Label::from_raw_bytes(&[1]).unwrap(),
547                Label::from_raw_bytes(b"*").unwrap(),
548            ),
549            (
550                Label::from_raw_bytes(b"*").unwrap(),
551                Label::from_raw_bytes(&[200]).unwrap(),
552            ),
553        ];
554
555        for (left, right) in comparisons {
556            println!("left: {left}, right: {right}");
557            assert_eq!(left.cmp(&right), Ordering::Less);
558        }
559    }
560
561    #[test]
562    fn test_is_wildcard() {
563        assert!(Label::from_raw_bytes(b"*").unwrap().is_wildcard());
564        assert!(Label::from_ascii("*").unwrap().is_wildcard());
565        assert!(Label::from_utf8("*").unwrap().is_wildcard());
566        assert!(!Label::from_raw_bytes(b"abc").unwrap().is_wildcard());
567    }
568
569    #[test]
570    fn test_ascii_escape() {
571        assert_eq!(
572            Label::from_raw_bytes(&[0o200]).unwrap().to_string(),
573            "\\200"
574        );
575        assert_eq!(
576            Label::from_raw_bytes(&[0o001]).unwrap().to_string(),
577            "\\001"
578        );
579        assert_eq!(Label::from_ascii(".").unwrap().to_ascii(), "\\.");
580        assert_eq!(
581            Label::from_ascii("ben.fry").unwrap().to_string(),
582            "ben\\.fry"
583        );
584        assert_eq!(Label::from_raw_bytes(&[0o200]).unwrap().to_ascii(), "\\200");
585    }
586}