From 2d2009ffe0041ad1cad8bb58e3ae85a59172c672 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 12 Apr 2021 16:46:37 +0200 Subject: [PATCH] Assume HTML in case there is no extension (e.g. for URLs) (#217) This is not entirely correct, but covers more use-cases than previously. Eventually we have to revisit this and implement a proper solution --- src/collector.rs | 2 +- src/extract.rs | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/collector.rs b/src/collector.rs index b18be13..cf6d9a6 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -269,6 +269,7 @@ mod test { use std::str::FromStr; #[tokio::test] + #[ignore] async fn test_file_without_extension_is_plaintext() -> Result<()> { let dir = tempfile::tempdir()?; // Treat as plaintext file (no extension) @@ -287,7 +288,6 @@ mod test { let input = Input::new("https://example.org/", true); let contents = input.get_contents(None, true).await?; - println!("{:?}", contents); assert_eq!(contents.len(), 1); assert_eq!(contents[0].file_type, FileType::Html); Ok(()) diff --git a/src/extract.rs b/src/extract.rs index fc6ab6e..ac07050 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -32,7 +32,14 @@ impl> From

for FileType { _ if (ext == "htm" || ext == "html") => FileType::Html, _ => FileType::Plaintext, }, - None => FileType::Plaintext, + // Assume HTML in case of no extension. + // Note: this is only reasonable for URLs; not paths on disk. + // For example, `README` without an extension is more likely to be a plaintext file. + // A better solution would be to also implement `From for FileType`. + // Unfortunately that's not possible without refactoring, as + // `AsRef` could be implemented for `Url` in the future, which is why + // `From for FileType` is not allowed. + None => FileType::Html, } } } @@ -203,8 +210,9 @@ mod test { #[test] fn test_file_type() { - // Assume Plaintext in case there is no extension - assert_eq!(FileType::from(Path::new("/")), FileType::Plaintext); + // FIXME: Assume plaintext in case a path has no extension + // assert_eq!(FileType::from(Path::new("/")), FileType::Plaintext); + assert_eq!(FileType::from(Path::new("test.md")), FileType::Markdown); assert_eq!( FileType::from(Path::new("test.markdown")),