summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2023-04-10 07:35:31 +0300
committerLars Wirzenius <liw@liw.fi>2023-04-10 07:35:31 +0300
commit3972a13ddcbc31430c19774f0e7da5adc4bbb668 (patch)
treecf6fb0e7144108118ffe52c4b6a198c0f084b445
parentdb1d5e4501d1a222a13a59c4e0bc448177b02f08 (diff)
downloadhtml-page-3972a13ddcbc31430c19774f0e7da5adc4bbb668.tar.gz
feat: retrieve text content of an element
Sponsored-by: author
-rw-r--r--src/lib.rs20
1 files changed, 20 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs
index edee6b5..d187a57 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -490,6 +490,14 @@ impl Element {
pub fn serialize(&self) -> String {
format!("{}", self)
}
+
+ /// Return all the textual content in an element and its children.
+ /// This does not include attributes.
+ pub fn plain_text(&self) -> String {
+ let mut text = TextVisitor::default();
+ text.visit(self);
+ text.text
+ }
}
impl Display for Element {
@@ -603,6 +611,18 @@ pub trait Visitor {
}
}
+/// A visitor to extract the text of an element and its children.
+#[derive(Debug, Default)]
+pub struct TextVisitor {
+ text: String,
+}
+
+impl Visitor for TextVisitor {
+ fn visit_text(&mut self, s: &str) {
+ self.text.push_str(s);
+ }
+}
+
#[cfg(test)]
mod test {
use super::{AttributeValue, Content, Element, Tag, Visitor};