-
-
Notifications
You must be signed in to change notification settings - Fork 6.1k
Chore: Importing from OneNote: Add debug tool for inspecting .one files
#15084
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
eed21ae
960142e
009c3e6
7265c8e
3814c0c
dafdaea
2f15612
056514f
8c96f33
7237aac
6d05536
d753e62
e35a2ef
099fff3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| /// A struct that has a specific `fmt::Debug` serialization. | ||
| /// Useful when customizing a `struct`'s debug output. | ||
| pub struct DebugOutput<'a>(&'a str); | ||
|
|
||
| impl<'a> From<&'a str> for DebugOutput<'a> { | ||
| fn from(value: &'a str) -> Self { | ||
| Self(value) | ||
| } | ||
| } | ||
|
|
||
| impl<'a> std::fmt::Debug for DebugOutput<'a> { | ||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
| f.write_str(self.0) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
|
|
||
| use widestring::U16CString; | ||
|
|
||
| pub mod debug; | ||
| pub mod errors; | ||
| mod file_api; | ||
| pub mod log; | ||
|
|
@@ -26,6 +27,6 @@ impl Utf16ToString for &[u8] { | |
| .collect(); | ||
|
|
||
| let value = U16CString::from_vec_truncate(data); | ||
| Ok(value.to_string().unwrap()) | ||
| value.to_string().map_err(|err| err.into()) | ||
|
Comment on lines
-29
to
+30
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change prevents the UTF-16 decoder from |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,3 +33,6 @@ features = [ | |
|
|
||
| [lib] | ||
| crate-type = ["cdylib", "lib"] | ||
|
|
||
| [[bin]] | ||
| name = "inspect" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| use parser::Parser; | ||
| use parser_utils::errors::Error; | ||
| use std::{ | ||
| env::{self, Args}, | ||
| path::PathBuf, | ||
| process::exit, | ||
| }; | ||
|
|
||
| pub fn main() { | ||
| let config = match Config::from_args(&mut env::args()) { | ||
| Ok(config) => config, | ||
| Err(error) => { | ||
| print_help_text(&error.program_name, error.reason); | ||
| exit(1) | ||
| } | ||
| }; | ||
|
|
||
| let input_path_string = &config.input_file.to_string_lossy(); | ||
| eprintln!("Reading {}", input_path_string); | ||
| let data = match std::fs::read(&config.input_file) { | ||
| Ok(data) => data, | ||
| Err(error) => { | ||
| let error = format!("File read error: {error}"); | ||
| print_help_text(&config.program_name, &error); | ||
| exit(2) | ||
| } | ||
| }; | ||
|
|
||
| let mut parser = Parser::new(); | ||
| if config.output_mode == OutputMode::Section { | ||
| let parsed_section = match parser.parse_section_from_data(&data, input_path_string) { | ||
| Ok(section) => section, | ||
| Err(error) => handle_parse_error(&config, error), | ||
| }; | ||
|
|
||
| println!("{:#?}", parsed_section); | ||
| } else { | ||
| let parsed_onestore = match parser.parse_onestore_raw(&data) { | ||
| Ok(section) => section, | ||
| Err(error) => handle_parse_error(&config, error), | ||
| }; | ||
|
|
||
| println!("{:#?}", parsed_onestore); | ||
| } | ||
| } | ||
|
|
||
| fn handle_parse_error(config: &Config, error: Error) -> ! { | ||
| let error = format!("Parse error: {error}"); | ||
| print_help_text(&config.program_name, &error); | ||
| exit(3) | ||
| } | ||
|
|
||
| fn print_help_text(program_name: &str, error: &str) { | ||
| let error_info = if error.is_empty() { "" } else { error }; | ||
|
|
||
| eprintln!("Usage: {program_name} <input_file> [--section|--onestore]"); | ||
| eprintln!("Description: Prints debug information about the given <input_file>"); | ||
| eprintln!("{error_info}"); | ||
| } | ||
|
|
||
| struct ConfigParseError { | ||
| reason: &'static str, | ||
| program_name: String, | ||
| } | ||
|
|
||
| #[derive(PartialEq)] | ||
| enum OutputMode { | ||
| /// Lower-level output | ||
| FileContent, | ||
| /// Higher-level output, including the parsed objects | ||
| Section, | ||
| } | ||
|
|
||
| struct Config { | ||
| input_file: PathBuf, | ||
| output_mode: OutputMode, | ||
| program_name: String, | ||
| } | ||
|
|
||
| impl Config { | ||
| pub fn from_args(args: &mut Args) -> Result<Self, ConfigParseError> { | ||
| let Some(program_name) = &args.next() else { | ||
| return Err(ConfigParseError { | ||
| reason: "Missing program name", | ||
| program_name: "??".into(), | ||
| }); | ||
| }; | ||
| let program_name = program_name.to_string(); | ||
| let Some(input_file) = &args.next() else { | ||
| return Err(ConfigParseError { | ||
| reason: "Not enough arguments", | ||
| program_name, | ||
| }); | ||
| }; | ||
|
|
||
| let output_mode = args.next().unwrap_or("--onestore".into()); | ||
| let output_mode = match output_mode.as_str() { | ||
| "--onestore" => Ok(OutputMode::FileContent), | ||
| "--section" => Ok(OutputMode::Section), | ||
| _ => { | ||
| return Err(ConfigParseError { | ||
| reason: "Invalid output mode (expected --onestore or --section)", | ||
| program_name, | ||
| }); | ||
| } | ||
| }?; | ||
|
|
||
| if args.next().is_some() { | ||
| return Err(ConfigParseError { | ||
| reason: "Too many arguments", | ||
| program_name, | ||
| }); | ||
| } | ||
|
|
||
| Ok(Config { | ||
| input_file: input_file.into(), | ||
| output_mode, | ||
| program_name, | ||
| }) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,7 @@ use crate::{ | |
| }, | ||
| shared::exguid::ExGuid, | ||
| }; | ||
| use parser_utils::{errors::Result, log}; | ||
| use parser_utils::errors::Result; | ||
| use std::fmt::Debug; | ||
| use std::rc::Rc; | ||
|
|
||
|
|
@@ -73,8 +73,9 @@ impl ObjectGroupList { | |
| if matches!(item, FileNodeData::ObjectGroupEndFND) { | ||
| break; | ||
| } else if let FileNodeData::DataSignatureGroupDefinitionFND(_) = item { | ||
| // Marks the end of a signature block. Ignored. | ||
| // See https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-onestore/0fa4c886-011a-4c19-9651-9a69e43a19c6 | ||
| iterator.next(); | ||
| log!("Ignoring DataSignatureGroupDefinitionFND"); | ||
|
Comment on lines
+76
to
-77
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change removes an unnecessary |
||
| } else if let Some(object) = Object::try_parse(iterator, &parse_context)? { | ||
| objects.push(Rc::new(object)); | ||
| } else { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| use crate::one::property::PropertyType; | ||
| use crate::shared::property::{PropertyId, PropertyValue}; | ||
| use parser_utils::Reader; | ||
| use parser_utils::Utf16ToString; | ||
| use parser_utils::debug::DebugOutput; | ||
| use parser_utils::errors::Result; | ||
| use std::collections::HashMap; | ||
| use std::fmt::Debug; | ||
|
|
@@ -25,12 +27,41 @@ pub(crate) struct PropertySet { | |
|
|
||
| impl Debug for PropertySet { | ||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
| fn format_value(value: &PropertyValue) -> String { | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change includes the string representation of some |
||
| match value { | ||
| PropertyValue::Vec(vec) => { | ||
| // Vec() property values are used to represent strings. Try creating a string representation for | ||
| // debugging purposes: | ||
| let s = vec | ||
| .as_slice() | ||
| // OneNote file strings are usually UTF-16 | ||
| .utf16_to_string() | ||
| .unwrap_or("".to_string()); | ||
|
|
||
| // Heuristic: If the text contains at least one ASCII letter/space character, it's probably a string. | ||
| // This will miss some non-ASCII strings and incorrectly print some non-string vecs. | ||
| let is_probably_string = !s.is_empty() | ||
| && s.chars() | ||
| .any(|c| c.is_ascii_whitespace() || c.is_ascii_alphanumeric()); | ||
| if is_probably_string { | ||
| format!("{:?} ({:?})", s, vec) | ||
| } else { | ||
| format!("{:?}", vec) | ||
| } | ||
| } | ||
| // Use the default compact representation of the value. | ||
| // This keeps potentially-long property values on a single line when producing | ||
| // multi-line debug output, which is usually more readable. | ||
| _ => format!("{:?}", value), | ||
| } | ||
| } | ||
|
|
||
| let mut debug_map = f.debug_map(); | ||
| for (key, (_, value)) in &self.values { | ||
| let formatted_key = format!("{:#0x}", key); | ||
| // Use the default compact representation of the value | ||
| let formatted_value = format!("{:?}", value); | ||
| debug_map.entry(&formatted_key, &formatted_value); | ||
| let formatted_value = format_value(value); | ||
|
|
||
| debug_map.entry(&formatted_key, &DebugOutput::from(formatted_value.as_str())); | ||
| } | ||
| debug_map.finish() | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -261,3 +261,4 @@ llamacpp | |
| bgcolor | ||
| bordercolor | ||
| togglefullscreen | ||
| onestore | ||
Uh oh!
There was an error while loading. Please reload this page.