Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions packages/onenote-converter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,21 @@ Suppose that the importer's Rust code is failing to parse a specific `example.on
2. Setting up Rust and Rust debugging. See [the relevant VSCode documentation](https://code.visualstudio.com/docs/languages/rust#_debugging) for details.
3. Clicking the "Debug" button for the test added in step 1. This button should be provided by extensions set up in step 2.

### Inspecting `.one` files

The `inspect` binary target of the `parser` crate allows inspecting `.one` file data.

For example, to inspect lower-level OneStore data:
```
bash$ cargo run -- ./test-data/ink.one --onestore
```

To inspect higher-level (parsed) section data:
```
bash$ cargo run -- ./test-data/ink.one --section
```
Comment thread
personalizedrefrigerator marked this conversation as resolved.
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

**Note**: `inspect`'s output is unstable and should not be relied upon by scripts.

### Developing

Expand Down
15 changes: 15 additions & 0 deletions packages/onenote-converter/parser-utils/src/debug.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/// A struct that has a specific `fmt::Debug` serialization.
/// Useful when customizing a `struct`'s debug output.
pub struct DebugOutput<'a>(&'a str);

impl<'a> From<&'a str> for DebugOutput<'a> {
fn from(value: &'a str) -> Self {
Self(value)
}
}

impl<'a> std::fmt::Debug for DebugOutput<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.0)
}
}
13 changes: 13 additions & 0 deletions packages/onenote-converter/parser-utils/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ impl From<widestring::error::MissingNulTerminator> for Error {
}
}

impl From<widestring::error::Utf16Error> for Error {
fn from(err: widestring::error::Utf16Error) -> Self {
ErrorKind::from(err).into()
}
}

impl From<uuid::Error> for Error {
fn from(err: uuid::Error) -> Self {
ErrorKind::from(err).into()
Expand Down Expand Up @@ -128,6 +134,13 @@ pub enum ErrorKind {
err: string::FromUtf16Error,
},

/// A different type of malformed UTF-16 string was encountered during parsing.
#[error("Malformed UTF-16 string: {err}")]
Utf16LibError {
#[from]
err: widestring::error::Utf16Error,
},

/// A UTF-16 string without a null terminator was encountered during parsing.
#[error("UTF-16 string is missing null terminator: {err}")]
Utf16MissingNull {
Expand Down
3 changes: 2 additions & 1 deletion packages/onenote-converter/parser-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use widestring::U16CString;

pub mod debug;
pub mod errors;
mod file_api;
pub mod log;
Expand All @@ -26,6 +27,6 @@ impl Utf16ToString for &[u8] {
.collect();

let value = U16CString::from_vec_truncate(data);
Ok(value.to_string().unwrap())
value.to_string().map_err(|err| err.into())
Comment on lines -29 to +30
Copy link
Copy Markdown
Collaborator Author

@personalizedrefrigerator personalizedrefrigerator Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change prevents the UTF-16 decoder from panicing when encountering invalid Unicode. It instead returns an error that can be handled by the caller. This allows the debug tool to safely include string representations of certain fields that might not include valid UTF-16.

}
}
3 changes: 3 additions & 0 deletions packages/onenote-converter/parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ features = [

[lib]
crate-type = ["cdylib", "lib"]

[[bin]]
name = "inspect"
121 changes: 121 additions & 0 deletions packages/onenote-converter/parser/src/bin/inspect.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use parser::Parser;
use parser_utils::errors::Error;
use std::{
env::{self, Args},
path::PathBuf,
process::exit,
};

pub fn main() {
let config = match Config::from_args(&mut env::args()) {
Ok(config) => config,
Err(error) => {
print_help_text(&error.program_name, error.reason);
exit(1)
}
};

let input_path_string = &config.input_file.to_string_lossy();
eprintln!("Reading {}", input_path_string);
let data = match std::fs::read(&config.input_file) {
Ok(data) => data,
Err(error) => {
let error = format!("File read error: {error}");
print_help_text(&config.program_name, &error);
exit(2)
}
};

let mut parser = Parser::new();
if config.output_mode == OutputMode::Section {
let parsed_section = match parser.parse_section_from_data(&data, input_path_string) {
Ok(section) => section,
Err(error) => handle_parse_error(&config, error),
};

println!("{:#?}", parsed_section);
} else {
let parsed_onestore = match parser.parse_onestore_raw(&data) {
Ok(section) => section,
Err(error) => handle_parse_error(&config, error),
};

println!("{:#?}", parsed_onestore);
}
}

fn handle_parse_error(config: &Config, error: Error) -> ! {
let error = format!("Parse error: {error}");
print_help_text(&config.program_name, &error);
exit(3)
}

fn print_help_text(program_name: &str, error: &str) {
let error_info = if error.is_empty() { "" } else { error };

eprintln!("Usage: {program_name} <input_file> [--section|--onestore]");
eprintln!("Description: Prints debug information about the given <input_file>");
eprintln!("{error_info}");
}

struct ConfigParseError {
reason: &'static str,
program_name: String,
}

#[derive(PartialEq)]
enum OutputMode {
/// Lower-level output
FileContent,
/// Higher-level output, including the parsed objects
Section,
}

struct Config {
input_file: PathBuf,
output_mode: OutputMode,
program_name: String,
}

impl Config {
pub fn from_args(args: &mut Args) -> Result<Self, ConfigParseError> {
let Some(program_name) = &args.next() else {
return Err(ConfigParseError {
reason: "Missing program name",
program_name: "??".into(),
});
};
let program_name = program_name.to_string();
let Some(input_file) = &args.next() else {
return Err(ConfigParseError {
reason: "Not enough arguments",
program_name,
});
};

let output_mode = args.next().unwrap_or("--onestore".into());
let output_mode = match output_mode.as_str() {
"--onestore" => Ok(OutputMode::FileContent),
"--section" => Ok(OutputMode::Section),
_ => {
return Err(ConfigParseError {
reason: "Invalid output mode (expected --onestore or --section)",
program_name,
});
}
}?;

if args.next().is_some() {
return Err(ConfigParseError {
reason: "Too many arguments",
program_name,
});
}

Ok(Config {
input_file: input_file.into(),
output_mode,
program_name,
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{
},
shared::exguid::ExGuid,
};
use parser_utils::{errors::Result, log};
use parser_utils::errors::Result;
use std::fmt::Debug;
use std::rc::Rc;

Expand Down Expand Up @@ -73,8 +73,9 @@ impl ObjectGroupList {
if matches!(item, FileNodeData::ObjectGroupEndFND) {
break;
} else if let FileNodeData::DataSignatureGroupDefinitionFND(_) = item {
// Marks the end of a signature block. Ignored.
// See https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-onestore/0fa4c886-011a-4c19-9651-9a69e43a19c6
iterator.next();
log!("Ignoring DataSignatureGroupDefinitionFND");
Comment on lines +76 to -77
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change removes an unnecessary log statement that cluttered the tool's output.

} else if let Some(object) = Object::try_parse(iterator, &parse_context)? {
objects.push(Rc::new(object));
} else {
Expand Down
9 changes: 8 additions & 1 deletion packages/onenote-converter/parser/src/onenote/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::rc::Rc;

use crate::onenote::notebook::Notebook;
use crate::onenote::section::{Section, SectionEntry, SectionGroup};
use crate::onestore::{OneStoreType, parse_onestore};
use crate::onestore::{OneStore, OneStoreType, parse_onestore};
use parser_utils::errors::{ErrorKind, Result};
use parser_utils::{fs_driver, log, reader::Reader};

Expand Down Expand Up @@ -73,6 +75,11 @@ impl Parser {
self.parse_section_from_data(&data, &path)
}

/// Parses low-level OneStore data
pub fn parse_onestore_raw(&mut self, data: &[u8]) -> Result<Rc<dyn OneStore>> {
parse_onestore(&mut Reader::new(data))
}

/// Parse a OneNote section file from a byte array.
/// The [path] is used to provide debugging information and determine
/// the name of the section file.
Expand Down
2 changes: 1 addition & 1 deletion packages/onenote-converter/parser/src/onestore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub mod mapping_table;
pub mod object;
pub mod object_space;

pub trait OneStore {
pub trait OneStore: std::fmt::Debug {
fn get_type(&self) -> OneStoreType;
fn data_root(&self) -> ObjectSpaceRef;
/// Fetches the object space that is parent to the object identified by the
Expand Down
6 changes: 3 additions & 3 deletions packages/onenote-converter/parser/src/onestore/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl ObjectFileData for FileBlob {

/// See [\[MS-ONESTORE\] 2.1.5](https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-onestore/ce60b62f-82e5-401a-bf2c-3255457732ad)
#[derive(Clone)]
pub(crate) struct Object {
pub struct Object {
pub(crate) context_id: ExGuid,

pub(crate) jc_id: JcId,
Expand Down Expand Up @@ -51,11 +51,11 @@ impl std::fmt::Debug for Object {
}

impl Object {
pub fn id(&self) -> JcId {
pub(crate) fn id(&self) -> JcId {
self.jc_id
}

pub fn props(&self) -> &ObjectPropSet {
pub(crate) fn props(&self) -> &ObjectPropSet {
&self.props
}

Expand Down
37 changes: 34 additions & 3 deletions packages/onenote-converter/parser/src/shared/prop_set.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::one::property::PropertyType;
use crate::shared::property::{PropertyId, PropertyValue};
use parser_utils::Reader;
use parser_utils::Utf16ToString;
use parser_utils::debug::DebugOutput;
use parser_utils::errors::Result;
use std::collections::HashMap;
use std::fmt::Debug;
Expand All @@ -25,12 +27,41 @@ pub(crate) struct PropertySet {

impl Debug for PropertySet {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn format_value(value: &PropertyValue) -> String {
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change includes the string representation of some Vec()s in the debug output (Vecs sometimes store Strings). This is useful when searching for a particular string in the lower-level --onestore debug output.

match value {
PropertyValue::Vec(vec) => {
// Vec() property values are used to represent strings. Try creating a string representation for
// debugging purposes:
let s = vec
.as_slice()
// OneNote file strings are usually UTF-16
.utf16_to_string()
.unwrap_or("".to_string());

// Heuristic: If the text contains at least one ASCII letter/space character, it's probably a string.
// This will miss some non-ASCII strings and incorrectly print some non-string vecs.
let is_probably_string = !s.is_empty()
&& s.chars()
.any(|c| c.is_ascii_whitespace() || c.is_ascii_alphanumeric());
if is_probably_string {
format!("{:?} ({:?})", s, vec)
} else {
format!("{:?}", vec)
}
}
// Use the default compact representation of the value.
// This keeps potentially-long property values on a single line when producing
// multi-line debug output, which is usually more readable.
_ => format!("{:?}", value),
}
}

let mut debug_map = f.debug_map();
for (key, (_, value)) in &self.values {
let formatted_key = format!("{:#0x}", key);
// Use the default compact representation of the value
let formatted_value = format!("{:?}", value);
debug_map.entry(&formatted_key, &formatted_value);
let formatted_value = format_value(value);

debug_map.entry(&formatted_key, &DebugOutput::from(formatted_value.as_str()));
}
debug_map.finish()
}
Expand Down
1 change: 1 addition & 0 deletions packages/tools/cspell/dictionary4.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,4 @@ llamacpp
bgcolor
bordercolor
togglefullscreen
onestore
Loading