Skip to main content

http/uri/
path.rs

1use std::convert::TryFrom;
2use std::str::FromStr;
3use std::{cmp, fmt, hash, str};
4
5use bytes::Bytes;
6
7use super::{ErrorKind, InvalidUri};
8use crate::byte_str::ByteStr;
9
10/// Represents the path component of a URI
11#[derive(Clone)]
12pub struct PathAndQuery {
13    pub(super) data: ByteStr,
14    pub(super) query: u16,
15}
16
17const NONE: u16 = u16::MAX;
18
19impl PathAndQuery {
20    // Not public while `bytes` is unstable.
21    pub(super) fn from_shared(mut src: Bytes) -> Result<Self, InvalidUri> {
22        let Scanned {
23            query,
24            fragment,
25            is_maybe_not_utf8,
26        } = scan_path_and_query(&src)?;
27
28        if let Some(i) = fragment {
29            src.truncate(i as usize);
30        }
31
32        let data = if is_maybe_not_utf8 {
33            ByteStr::from_utf8(src).map_err(|_| ErrorKind::InvalidUriChar)?
34        } else {
35            unsafe { ByteStr::from_utf8_unchecked(src) }
36        };
37
38        Ok(PathAndQuery { data, query })
39    }
40
41    /// Convert a `PathAndQuery` from a static string.
42    ///
43    /// This function will not perform any copying, however the string is
44    /// checked to ensure that it is valid.
45    ///
46    /// # Panics
47    ///
48    /// This function panics if the argument is an invalid path and query.
49    ///
50    /// # Examples
51    ///
52    /// ```
53    /// # use http::uri::*;
54    /// let v = PathAndQuery::from_static("/hello?world");
55    ///
56    /// assert_eq!(v.path(), "/hello");
57    /// assert_eq!(v.query(), Some("world"));
58    /// ```
59    #[inline]
60    pub const fn from_static(src: &'static str) -> Self {
61        match scan_path_and_query(src.as_bytes()) {
62            Ok(Scanned {
63                query,
64                fragment: None,
65                is_maybe_not_utf8: false,
66            }) => PathAndQuery {
67                data: ByteStr::from_static(src),
68                query,
69            },
70            // Yes, we reject fragments and non-utf8
71            _ => panic!("static str is not valid path"),
72        }
73    }
74
75    /// Attempt to convert a `Bytes` buffer to a `PathAndQuery`.
76    ///
77    /// This will try to prevent a copy if the type passed is the type used
78    /// internally, and will copy the data if it is not.
79    pub fn from_maybe_shared<T>(src: T) -> Result<Self, InvalidUri>
80    where
81        T: AsRef<[u8]> + 'static,
82    {
83        if_downcast_into!(T, Bytes, src, {
84            return PathAndQuery::from_shared(src);
85        });
86
87        PathAndQuery::try_from(src.as_ref())
88    }
89
90    pub(super) fn empty() -> Self {
91        PathAndQuery {
92            data: ByteStr::new(),
93            query: NONE,
94        }
95    }
96
97    pub(super) fn slash() -> Self {
98        PathAndQuery {
99            data: ByteStr::from_static("/"),
100            query: NONE,
101        }
102    }
103
104    pub(super) fn star() -> Self {
105        PathAndQuery {
106            data: ByteStr::from_static("*"),
107            query: NONE,
108        }
109    }
110
111    /// Returns the path component
112    ///
113    /// The path component is **case sensitive**.
114    ///
115    /// ```notrust
116    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
117    ///                                        |--------|
118    ///                                             |
119    ///                                           path
120    /// ```
121    ///
122    /// If the URI is `*` then the path component is equal to `*`.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// # use http::uri::*;
128    ///
129    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
130    ///
131    /// assert_eq!(path_and_query.path(), "/hello/world");
132    /// ```
133    #[inline]
134    pub fn path(&self) -> &str {
135        let ret = if self.query == NONE {
136            &self.data[..]
137        } else {
138            &self.data[..self.query as usize]
139        };
140
141        if ret.is_empty() {
142            return "/";
143        }
144
145        ret
146    }
147
148    /// Returns the query string component
149    ///
150    /// The query component contains non-hierarchical data that, along with data
151    /// in the path component, serves to identify a resource within the scope of
152    /// the URI's scheme and naming authority (if any). The query component is
153    /// indicated by the first question mark ("?") character and terminated by a
154    /// number sign ("#") character or by the end of the URI.
155    ///
156    /// ```notrust
157    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
158    ///                                                   |-------------------|
159    ///                                                             |
160    ///                                                           query
161    /// ```
162    ///
163    /// # Examples
164    ///
165    /// With a query string component
166    ///
167    /// ```
168    /// # use http::uri::*;
169    /// let path_and_query: PathAndQuery = "/hello/world?key=value&foo=bar".parse().unwrap();
170    ///
171    /// assert_eq!(path_and_query.query(), Some("key=value&foo=bar"));
172    /// ```
173    ///
174    /// Without a query string component
175    ///
176    /// ```
177    /// # use http::uri::*;
178    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
179    ///
180    /// assert!(path_and_query.query().is_none());
181    /// ```
182    #[inline]
183    pub fn query(&self) -> Option<&str> {
184        if self.query == NONE {
185            None
186        } else {
187            let i = self.query + 1;
188            Some(&self.data[i as usize..])
189        }
190    }
191
192    /// Returns the path and query as a string component.
193    ///
194    /// # Examples
195    ///
196    /// With a query string component
197    ///
198    /// ```
199    /// # use http::uri::*;
200    /// let path_and_query: PathAndQuery = "/hello/world?key=value&foo=bar".parse().unwrap();
201    ///
202    /// assert_eq!(path_and_query.as_str(), "/hello/world?key=value&foo=bar");
203    /// ```
204    ///
205    /// Without a query string component
206    ///
207    /// ```
208    /// # use http::uri::*;
209    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
210    ///
211    /// assert_eq!(path_and_query.as_str(), "/hello/world");
212    /// ```
213    #[inline]
214    pub fn as_str(&self) -> &str {
215        let ret = &self.data[..];
216        if ret.is_empty() {
217            return "/";
218        }
219        ret
220    }
221}
222
223impl TryFrom<&[u8]> for PathAndQuery {
224    type Error = InvalidUri;
225    #[inline]
226    fn try_from(s: &[u8]) -> Result<Self, Self::Error> {
227        PathAndQuery::from_shared(Bytes::copy_from_slice(s))
228    }
229}
230
231impl TryFrom<&str> for PathAndQuery {
232    type Error = InvalidUri;
233    #[inline]
234    fn try_from(s: &str) -> Result<Self, Self::Error> {
235        TryFrom::try_from(s.as_bytes())
236    }
237}
238
239impl TryFrom<Vec<u8>> for PathAndQuery {
240    type Error = InvalidUri;
241    #[inline]
242    fn try_from(vec: Vec<u8>) -> Result<Self, Self::Error> {
243        PathAndQuery::from_shared(vec.into())
244    }
245}
246
247impl TryFrom<String> for PathAndQuery {
248    type Error = InvalidUri;
249    #[inline]
250    fn try_from(s: String) -> Result<Self, Self::Error> {
251        PathAndQuery::from_shared(s.into())
252    }
253}
254
255impl TryFrom<&String> for PathAndQuery {
256    type Error = InvalidUri;
257    #[inline]
258    fn try_from(s: &String) -> Result<Self, Self::Error> {
259        TryFrom::try_from(s.as_bytes())
260    }
261}
262
263impl FromStr for PathAndQuery {
264    type Err = InvalidUri;
265    #[inline]
266    fn from_str(s: &str) -> Result<Self, InvalidUri> {
267        TryFrom::try_from(s)
268    }
269}
270
271impl fmt::Debug for PathAndQuery {
272    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
273        fmt::Display::fmt(self, f)
274    }
275}
276
277impl fmt::Display for PathAndQuery {
278    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
279        if !self.data.is_empty() {
280            match self.data.as_bytes()[0] {
281                b'/' | b'*' => write!(fmt, "{}", &self.data[..]),
282                _ => write!(fmt, "/{}", &self.data[..]),
283            }
284        } else {
285            write!(fmt, "/")
286        }
287    }
288}
289
290impl hash::Hash for PathAndQuery {
291    fn hash<H: hash::Hasher>(&self, state: &mut H) {
292        self.data.hash(state);
293    }
294}
295
296// ===== PartialEq / PartialOrd =====
297
298impl PartialEq for PathAndQuery {
299    #[inline]
300    fn eq(&self, other: &PathAndQuery) -> bool {
301        self.data == other.data
302    }
303}
304
305impl Eq for PathAndQuery {}
306
307impl PartialEq<str> for PathAndQuery {
308    #[inline]
309    fn eq(&self, other: &str) -> bool {
310        self.as_str() == other
311    }
312}
313
314impl PartialEq<PathAndQuery> for &str {
315    #[inline]
316    fn eq(&self, other: &PathAndQuery) -> bool {
317        self == &other.as_str()
318    }
319}
320
321impl PartialEq<&str> for PathAndQuery {
322    #[inline]
323    fn eq(&self, other: &&str) -> bool {
324        self.as_str() == *other
325    }
326}
327
328impl PartialEq<PathAndQuery> for str {
329    #[inline]
330    fn eq(&self, other: &PathAndQuery) -> bool {
331        self == other.as_str()
332    }
333}
334
335impl PartialEq<String> for PathAndQuery {
336    #[inline]
337    fn eq(&self, other: &String) -> bool {
338        self.as_str() == other.as_str()
339    }
340}
341
342impl PartialEq<PathAndQuery> for String {
343    #[inline]
344    fn eq(&self, other: &PathAndQuery) -> bool {
345        self.as_str() == other.as_str()
346    }
347}
348
349impl PartialOrd for PathAndQuery {
350    #[inline]
351    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
352        self.as_str().partial_cmp(other.as_str())
353    }
354}
355
356impl PartialOrd<str> for PathAndQuery {
357    #[inline]
358    fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
359        self.as_str().partial_cmp(other)
360    }
361}
362
363impl PartialOrd<PathAndQuery> for str {
364    #[inline]
365    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
366        self.partial_cmp(other.as_str())
367    }
368}
369
370impl PartialOrd<&str> for PathAndQuery {
371    #[inline]
372    fn partial_cmp(&self, other: &&str) -> Option<cmp::Ordering> {
373        self.as_str().partial_cmp(*other)
374    }
375}
376
377impl PartialOrd<PathAndQuery> for &str {
378    #[inline]
379    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
380        self.partial_cmp(&other.as_str())
381    }
382}
383
384impl PartialOrd<String> for PathAndQuery {
385    #[inline]
386    fn partial_cmp(&self, other: &String) -> Option<cmp::Ordering> {
387        self.as_str().partial_cmp(other.as_str())
388    }
389}
390
391impl PartialOrd<PathAndQuery> for String {
392    #[inline]
393    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
394        self.as_str().partial_cmp(other.as_str())
395    }
396}
397
398// Scanner implementation that is `const fn`, usable by both `from_static`
399// and `from_shared`.
400// =====
401
402struct Scanned {
403    query: u16,
404    fragment: Option<u16>,
405    is_maybe_not_utf8: bool,
406}
407
408const fn scan_path_and_query(bytes: &[u8]) -> Result<Scanned, ErrorKind> {
409    let mut i = 0;
410    let mut query = NONE;
411    let mut fragment = None;
412
413    let mut is_maybe_not_utf8 = false;
414
415    if bytes.is_empty() {
416        return Err(ErrorKind::Empty);
417    }
418
419    if bytes.len() == 1 && bytes[0] == b'*' {
420        return Ok(Scanned {
421            query,
422            fragment,
423            is_maybe_not_utf8: false,
424        });
425    }
426
427    if !matches!(bytes[0], b'/' | b'?' | b'#') {
428        return Err(ErrorKind::PathDoesNotStartWithSlash);
429    }
430
431    while i < bytes.len() {
432        // See https://url.spec.whatwg.org/#path-state
433        match bytes[i] {
434            b'?' => {
435                debug_assert!(query == NONE);
436                query = i as u16;
437                i += 1;
438                break;
439            }
440            b'#' => {
441                fragment = Some(i as u16);
442                break;
443            }
444
445            // This is the range of bytes that don't need to be
446            // percent-encoded in the path. If it should have been
447            // percent-encoded, then error.
448            #[rustfmt::skip]
449            0x21 |
450            0x24..=0x3B |
451            0x3D |
452            0x40..=0x5F |
453            0x61..=0x7A |
454            0x7C |
455            0x7E => {}
456
457            // potentially utf8, might not, should check
458            0x80..=0xFF => {
459                is_maybe_not_utf8 = true;
460            }
461
462            // These are code points that are supposed to be
463            // percent-encoded in the path but there are clients
464            // out there sending them as is and httparse accepts
465            // to parse those requests, so they are allowed here
466            // for parity.
467            //
468            // For reference, those are code points that are used
469            // to send requests with JSON directly embedded in
470            // the URI path. Yes, those things happen for real.
471            #[rustfmt::skip]
472            b'"' |
473            b'{' | b'}' => {}
474
475            _ => return Err(ErrorKind::InvalidUriChar),
476        }
477        i += 1;
478    }
479
480    // query ...
481    if query != NONE {
482        while i < bytes.len() {
483            match bytes[i] {
484                // While queries *should* be percent-encoded, most
485                // bytes are actually allowed...
486                // See https://url.spec.whatwg.org/#query-state
487                //
488                // Allowed: 0x21 / 0x24 - 0x3B / 0x3D / 0x3F - 0x7E
489                #[rustfmt::skip]
490                0x21 |
491                0x24..=0x3B |
492                0x3D |
493                0x3F..=0x7E => {}
494
495                0x80..=0xFF => {
496                    is_maybe_not_utf8 = true;
497                }
498
499                b'#' => {
500                    fragment = Some(i as u16);
501                    break;
502                }
503
504                _ => return Err(ErrorKind::InvalidUriChar),
505            }
506            i += 1;
507        }
508    }
509
510    Ok(Scanned {
511        query,
512        fragment,
513        is_maybe_not_utf8,
514    })
515}
516
517#[cfg(test)]
518mod tests {
519    use super::*;
520
521    #[test]
522    fn equal_to_self_of_same_path() {
523        let p1: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
524        let p2: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
525        assert_eq!(p1, p2);
526        assert_eq!(p2, p1);
527    }
528
529    #[test]
530    fn not_equal_to_self_of_different_path() {
531        let p1: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
532        let p2: PathAndQuery = "/world&foo=bar".parse().unwrap();
533        assert_ne!(p1, p2);
534        assert_ne!(p2, p1);
535    }
536
537    #[test]
538    fn equates_with_a_str() {
539        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
540        assert_eq!(&path_and_query, "/hello/world&foo=bar");
541        assert_eq!("/hello/world&foo=bar", &path_and_query);
542        assert_eq!(path_and_query, "/hello/world&foo=bar");
543        assert_eq!("/hello/world&foo=bar", path_and_query);
544    }
545
546    #[test]
547    fn not_equal_with_a_str_of_a_different_path() {
548        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
549        // as a reference
550        assert_ne!(&path_and_query, "/hello&foo=bar");
551        assert_ne!("/hello&foo=bar", &path_and_query);
552        // without reference
553        assert_ne!(path_and_query, "/hello&foo=bar");
554        assert_ne!("/hello&foo=bar", path_and_query);
555    }
556
557    #[test]
558    fn equates_with_a_string() {
559        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
560        assert_eq!(path_and_query, "/hello/world&foo=bar".to_string());
561        assert_eq!("/hello/world&foo=bar".to_string(), path_and_query);
562    }
563
564    #[test]
565    fn not_equal_with_a_string_of_a_different_path() {
566        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
567        assert_ne!(path_and_query, "/hello&foo=bar".to_string());
568        assert_ne!("/hello&foo=bar".to_string(), path_and_query);
569    }
570
571    #[test]
572    fn compares_to_self() {
573        let p1: PathAndQuery = "/a/world&foo=bar".parse().unwrap();
574        let p2: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
575        assert!(p1 < p2);
576        assert!(p2 > p1);
577    }
578
579    #[test]
580    fn compares_with_a_str() {
581        let path_and_query: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
582        // by ref
583        assert!(&path_and_query < "/c/world&foo=bar");
584        assert!("/c/world&foo=bar" > &path_and_query);
585        assert!(&path_and_query > "/a/world&foo=bar");
586        assert!("/a/world&foo=bar" < &path_and_query);
587
588        // by val
589        assert!(path_and_query < "/c/world&foo=bar");
590        assert!("/c/world&foo=bar" > path_and_query);
591        assert!(path_and_query > "/a/world&foo=bar");
592        assert!("/a/world&foo=bar" < path_and_query);
593    }
594
595    #[test]
596    fn compares_with_a_string() {
597        let path_and_query: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
598        assert!(path_and_query < "/c/world&foo=bar".to_string());
599        assert!("/c/world&foo=bar".to_string() > path_and_query);
600        assert!(path_and_query > "/a/world&foo=bar".to_string());
601        assert!("/a/world&foo=bar".to_string() < path_and_query);
602    }
603
604    #[test]
605    fn ignores_valid_percent_encodings() {
606        assert_eq!("/a%20b", pq("/a%20b?r=1").path());
607        assert_eq!("qr=%31", pq("/a/b?qr=%31").query().unwrap());
608    }
609
610    #[test]
611    fn ignores_invalid_percent_encodings() {
612        assert_eq!("/a%%b", pq("/a%%b?r=1").path());
613        assert_eq!("/aaa%", pq("/aaa%").path());
614        assert_eq!("/aaa%", pq("/aaa%?r=1").path());
615        assert_eq!("/aa%2", pq("/aa%2").path());
616        assert_eq!("/aa%2", pq("/aa%2?r=1").path());
617        assert_eq!("qr=%3", pq("/a/b?qr=%3").query().unwrap());
618    }
619
620    #[test]
621    fn allow_utf8_in_path() {
622        assert_eq!("/🍕", pq("/🍕").path());
623    }
624
625    #[test]
626    fn allow_utf8_in_query() {
627        assert_eq!(Some("pizza=🍕"), pq("/test?pizza=🍕").query());
628    }
629
630    #[test]
631    fn rejects_invalid_utf8_in_path() {
632        PathAndQuery::try_from(&[b'/', 0xFF][..]).expect_err("reject invalid utf8");
633    }
634
635    #[test]
636    fn rejects_invalid_utf8_in_query() {
637        PathAndQuery::try_from(&[b'/', b'a', b'?', 0xFF][..]).expect_err("reject invalid utf8");
638    }
639
640    #[test]
641    fn rejects_empty_string() {
642        PathAndQuery::try_from("").expect_err("reject empty str");
643    }
644
645    #[test]
646    fn requires_starting_with_slash() {
647        PathAndQuery::try_from("sneaky").expect_err("reject missing slash");
648    }
649
650    #[test]
651    fn rejects_del_in_path() {
652        PathAndQuery::try_from(&[b'/', 0x7F][..]).expect_err("reject DEL");
653    }
654
655    #[test]
656    fn rejects_del_in_query() {
657        PathAndQuery::try_from(&[b'/', b'a', b'?', 0x7F][..]).expect_err("reject DEL");
658    }
659
660    #[test]
661    fn json_is_fine() {
662        assert_eq!(
663            r#"/{"bread":"baguette"}"#,
664            pq(r#"/{"bread":"baguette"}"#).path()
665        );
666    }
667
668    fn pq(s: &str) -> PathAndQuery {
669        s.parse().expect(&format!("parsing {}", s))
670    }
671}