7

The following code does not compile:

use std::str::Chars;

struct Chunks {
    remaining: Chars,
}

impl Chunks {
    fn new(s: String) -> Self {
        Chunks {
            remaining: s.chars(),
        }
    }
}

The error is:

error[E0106]: missing lifetime specifier
 --> src/main.rs:4:16
  |
4 |     remaining: Chars,
  |                ^^^^^ expected lifetime parameter

Chars doesn't own the characters it iterates over and it can't outlive the &str or String it was created from.

Is there an owned version of Chars that does not need a lifetime parameter or do I have to keep a Vec<char> and an index myself?

Challenger5
  • 869
  • 5
  • 25

5 Answers5

7

std::vec::IntoIter is an owned version of every iterator, in a sense.

use std::vec::IntoIter;

struct Chunks {
    remaining: IntoIter<char>,
}

impl Chunks {
    fn new(s: String) -> Self {
        Chunks {
            remaining: s.chars().collect::<Vec<_>>().into_iter(),
        }
    }
}

Playground link

Downside is additional allocation and a space overhead, but I am not aware of the iterator for your specific case.

red75prime
  • 3,461
  • 1
  • 15
  • 20
5

Ouroboros

You can use the ouroboros crate to create a self-referential struct containing the String and a Chars iterator:

use ouroboros::self_referencing; // 0.4.1
use std::str::Chars;

#[self_referencing]
pub struct IntoChars {
    string: String,
    #[borrows(string)]
    chars: Chars<'this>,
}

// All these implementations are based on what `Chars` implements itself

impl Iterator for IntoChars {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        self.with_mut(|me| me.chars.next())
    }

    #[inline]
    fn count(mut self) -> usize {
        self.with_mut(|me| me.chars.count())
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        self.with(|me| me.chars.size_hint())
    }

    #[inline]
    fn last(mut self) -> Option<Self::Item> {
        self.with_mut(|me| me.chars.last())
    }
}

impl DoubleEndedIterator for IntoChars {
    #[inline]
    fn next_back(&mut self) -> Option<Self::Item> {
        self.with_mut(|me| me.chars.next_back())
    }
}

impl std::iter::FusedIterator for IntoChars {}

// And an extension trait for convenience

trait IntoCharsExt {
    fn into_chars(self) -> IntoChars;
}

impl IntoCharsExt for String {
    fn into_chars(self) -> IntoChars {
        IntoCharsBuilder {
            string: self,
            chars_builder: |s| s.chars(),
        }
        .build()
    }
}

See also:

Rental

You can use the rental crate to create a self-referential struct containing the String and a Chars iterator:

#[macro_use]
extern crate rental;

rental! {
    mod into_chars {
        pub use std::str::Chars;

        #[rental]
        pub struct IntoChars {
            string: String,
            chars: Chars<'string>,
        }
    }
}

use into_chars::IntoChars;

// All these implementations are based on what `Chars` implements itself

impl Iterator for IntoChars {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        self.rent_mut(|chars| chars.next())
    }

    #[inline]
    fn count(mut self) -> usize {
        self.rent_mut(|chars| chars.count())
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        self.rent(|chars| chars.size_hint())
    }

    #[inline]
    fn last(mut self) -> Option<Self::Item> {
        self.rent_mut(|chars| chars.last())
    }
}

impl DoubleEndedIterator for IntoChars {
    #[inline]
    fn next_back(&mut self) -> Option<Self::Item> {
        self.rent_mut(|chars| chars.next_back())
    }
}

impl std::iter::FusedIterator for IntoChars {}

// And an extension trait for convenience 

trait IntoCharsExt {
    fn into_chars(self) -> IntoChars;
}

impl IntoCharsExt for String {
    fn into_chars(self) -> IntoChars {
        IntoChars::new(self, |s| s.chars())
    }
}

See also:

Shepmaster
  • 326,504
  • 69
  • 892
  • 1,159
3

There's also the owned-chars crate, which

provides an extension trait for String with two methods, into_chars and into_char_indices. These methods parallel String::chars and String::char_indices, but the iterators they create consume the String instead of borrowing it.

Adrian Heine
  • 3,851
  • 2
  • 27
  • 42
2

You could implement your own iterator, or wrap Chars like this (with just one small unsafe block):

// deriving Clone would be buggy. With Rc<>/Arc<> instead of Box<> it would work though.
struct OwnedChars {
    // struct fields are dropped in order they are declared,
    // see https://stackoverflow.com/a/41056727/1478356
    // with `Chars` it probably doesn't matter, but for good style `inner`
    // should be dropped before `storage`.

    // 'static lifetime must not "escape" lifetime of the struct
    inner: ::std::str::Chars<'static>,
    // we need to box anyway to be sure the inner reference doesn't move when
    // moving the storage, so we can erase the type as well.
    // struct OwnedChar<S: AsRef<str>> { ..., storage: Box<S> } should work too
    storage: Box<AsRef<str>>,
}

impl OwnedChars {
    pub fn new<S: AsRef<str>+'static>(s: S) -> Self {
        let storage = Box::new(s) as Box<AsRef<str>>;
        let raw_ptr : *const str = storage.as_ref().as_ref();
        let ptr : &'static str = unsafe { &*raw_ptr };
        OwnedChars{
            storage: storage,
            inner: ptr.chars(),
        }
    }

    pub fn as_str(&self) -> &str {
        self.inner.as_str()
    }
}

impl Iterator for OwnedChars {
    // just `char` of course
    type Item = <::std::str::Chars<'static> as Iterator>::Item;

    fn next(&mut self) -> Option<Self::Item> {
        self.inner.next()
    }
}

impl DoubleEndedIterator for OwnedChars {
    fn next_back(&mut self) -> Option<Self::Item> {
        self.inner.next_back()
    }
}

impl Clone for OwnedChars {
    fn clone(&self) -> Self {
        // need a new allocation anyway, so simply go for String, and just
        // clone the remaining string
        OwnedChars::new(String::from(self.inner.as_str()))
    }
}

impl ::std::fmt::Debug for OwnedChars {
    fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
        let storage : &str = self.storage.as_ref().as_ref();
        f.debug_struct("OwnedChars")
            .field("storage", &storage)
            .field("inner", &self.inner)
            .finish()
    }
}

// easy access
trait StringExt {
    fn owned_chars(self) -> OwnedChars;
}
impl<S: AsRef<str>+'static> StringExt for S {
    fn owned_chars(self) -> OwnedChars {
        OwnedChars::new(self)
    }
}

See playground

Stefan
  • 4,914
  • 1
  • 21
  • 40
  • 2
    [The same thing](https://play.rust-lang.org/?gist=8a5cc326c0fa08cafe7733251ac53546&version=stable), but using [rental crate](https://crates.io/crates/rental). Unfortunately, it doesn't work in playground. – red75prime Nov 09 '17 at 10:15
  • Why is the extra box necessary? `S` can only be `String`, `Box` or some other kind of owning `str` reference, right? So the storage must be heap allocated (if it's not `'static`) and therefore won't move until the `S` is dropped. (As long as `OwnedChars` doesn't `push` things on or otherwise trigger a move.) – trent Nov 09 '17 at 14:11
  • I could create a string storage type with small-string optimization (see [`smallvec`](https://crates.io/crates/smallvec) create). – Stefan Nov 09 '17 at 14:22
  • @Stefan Ah, true. But it seems like the normal use for this struct is when you have a `String` in hand and in that case it's double boxed. Do you think it would be safe to store a `Box` instead and have `new>>`? That would work for any reference as well as owned `String`s, only copies the contents when necessary, and doesn't double-box. – trent Nov 09 '17 at 14:51
  • I'm not sure about the allocation overhead of converting `String` to `Box` - if it reuses the `Vec` memory this should be faster, yes. If you know you only want to do this for `String`s you can just use that (unboxed) instead too of course - afaict `String` guarantees heap allocation. – Stefan Nov 09 '17 at 15:01
1

As copied from How can I store a Chars iterator in the same struct as the String it is iterating on?:

use std::mem;
use std::str::Chars;

/// I believe this struct to be safe because the String is
/// heap-allocated (stable address) and will never be modified
/// (stable address). `chars` will not outlive the struct, so
/// lying about the lifetime should be fine.
///
/// TODO: What about during destruction?
///       `Chars` shouldn't have a destructor...
struct OwningChars {
    _s: String,
    chars: Chars<'static>,
}

impl OwningChars {
    fn new(s: String) -> Self {
        let chars = unsafe { mem::transmute(s.chars()) };
        OwningChars { _s: s, chars }
    }
}

impl Iterator for OwningChars {
    type Item = char;
    fn next(&mut self) -> Option<Self::Item> {
        self.chars.next()
    }
}
Shepmaster
  • 326,504
  • 69
  • 892
  • 1,159