feat: spreadsheet artifact (#13345)

This commit is contained in:
jif-oai
2026-03-03 12:25:40 +00:00
committed by GitHub
parent 564a883c2a
commit 8c5e50ef39
12 changed files with 5213 additions and 0 deletions

15
codex-rs/Cargo.lock generated
View File

@@ -1558,6 +1558,21 @@ dependencies = [
"zip 2.4.2",
]
[[package]]
name = "codex-artifact-spreadsheet"
version = "0.0.0"
dependencies = [
"base64 0.22.1",
"pretty_assertions",
"regex",
"serde",
"serde_json",
"tempfile",
"thiserror 2.0.18",
"uuid",
"zip 2.4.2",
]
[[package]]
name = "codex-async-utils"
version = "0.0.0"

View File

@@ -34,6 +34,7 @@ members = [
"network-proxy",
"ollama",
"artifact-presentation",
"artifact-spreadsheet",
"process-hardening",
"protocol",
"rmcp-client",
@@ -111,6 +112,7 @@ codex-network-proxy = { path = "network-proxy" }
codex-ollama = { path = "ollama" }
codex-otel = { path = "otel" }
codex-artifact-presentation = { path = "artifact-presentation" }
codex-artifact-spreadsheet = { path = "artifact-spreadsheet" }
codex-process-hardening = { path = "process-hardening" }
codex-protocol = { path = "protocol" }
codex-responses-api-proxy = { path = "responses-api-proxy" }
@@ -352,6 +354,7 @@ ignored = [
"openssl-sys",
"codex-utils-readiness",
"codex-secrets",
"codex-artifact-spreadsheet"
]
[profile.release]

View File

@@ -0,0 +1,6 @@
load("//:defs.bzl", "codex_rust_crate")
codex_rust_crate(
name = "artifact-spreadsheet",
crate_name = "codex_artifact_spreadsheet",
)

View File

@@ -0,0 +1,25 @@
[package]
name = "codex-artifact-spreadsheet"
version.workspace = true
edition.workspace = true
license.workspace = true
[lib]
name = "codex_artifact_spreadsheet"
path = "src/lib.rs"
[lints]
workspace = true
[dependencies]
base64 = { workspace = true }
regex = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
uuid = { workspace = true, features = ["v4"] }
zip = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }
tempfile = { workspace = true }

View File

@@ -0,0 +1,245 @@
use serde::Deserialize;
use serde::Serialize;
use crate::SpreadsheetArtifactError;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct CellAddress {
pub column: u32,
pub row: u32,
}
impl CellAddress {
pub fn parse(address: &str) -> Result<Self, SpreadsheetArtifactError> {
let trimmed = address.trim();
if trimmed.is_empty() {
return Err(SpreadsheetArtifactError::InvalidAddress {
address: address.to_string(),
message: "address is empty".to_string(),
});
}
let mut split = 0usize;
for (index, ch) in trimmed.char_indices() {
if ch.is_ascii_alphabetic() {
split = index + ch.len_utf8();
} else {
break;
}
}
let (letters, digits) = trimmed.split_at(split);
if letters.is_empty() || digits.is_empty() {
return Err(SpreadsheetArtifactError::InvalidAddress {
address: address.to_string(),
message: "expected A1-style address".to_string(),
});
}
if !letters.chars().all(|ch| ch.is_ascii_alphabetic())
|| !digits.chars().all(|ch| ch.is_ascii_digit())
{
return Err(SpreadsheetArtifactError::InvalidAddress {
address: address.to_string(),
message: "expected letters followed by digits".to_string(),
});
}
let column = column_letters_to_index(letters)?;
let row = digits
.parse::<u32>()
.map_err(|_| SpreadsheetArtifactError::InvalidAddress {
address: address.to_string(),
message: "row must be a positive integer".to_string(),
})?;
if row == 0 {
return Err(SpreadsheetArtifactError::InvalidAddress {
address: address.to_string(),
message: "row must be positive".to_string(),
});
}
Ok(Self { column, row })
}
pub fn to_a1(self) -> String {
format!("{}{}", column_index_to_letters(self.column), self.row)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CellRange {
pub start: CellAddress,
pub end: CellAddress,
}
impl CellRange {
pub fn parse(address: &str) -> Result<Self, SpreadsheetArtifactError> {
let trimmed = address.trim();
if trimmed.is_empty() {
return Err(SpreadsheetArtifactError::InvalidAddress {
address: address.to_string(),
message: "range is empty".to_string(),
});
}
let (start, end) = if let Some((left, right)) = trimmed.split_once(':') {
(CellAddress::parse(left)?, CellAddress::parse(right)?)
} else {
let cell = CellAddress::parse(trimmed)?;
(cell, cell)
};
let normalized = Self {
start: CellAddress {
column: start.column.min(end.column),
row: start.row.min(end.row),
},
end: CellAddress {
column: start.column.max(end.column),
row: start.row.max(end.row),
},
};
Ok(normalized)
}
pub fn from_start_end(start: CellAddress, end: CellAddress) -> Self {
Self {
start: CellAddress {
column: start.column.min(end.column),
row: start.row.min(end.row),
},
end: CellAddress {
column: start.column.max(end.column),
row: start.row.max(end.row),
},
}
}
pub fn to_a1(&self) -> String {
if self.is_single_cell() {
self.start.to_a1()
} else {
format!("{}:{}", self.start.to_a1(), self.end.to_a1())
}
}
pub fn is_single_cell(&self) -> bool {
self.start == self.end
}
pub fn is_single_row(&self) -> bool {
self.start.row == self.end.row
}
pub fn is_single_column(&self) -> bool {
self.start.column == self.end.column
}
pub fn width(&self) -> usize {
(self.end.column - self.start.column + 1) as usize
}
pub fn height(&self) -> usize {
(self.end.row - self.start.row + 1) as usize
}
pub fn contains(&self, address: CellAddress) -> bool {
self.start.column <= address.column
&& address.column <= self.end.column
&& self.start.row <= address.row
&& address.row <= self.end.row
}
pub fn contains_range(&self, other: &CellRange) -> bool {
self.contains(other.start) && self.contains(other.end)
}
pub fn intersects(&self, other: &CellRange) -> bool {
!(self.end.column < other.start.column
|| other.end.column < self.start.column
|| self.end.row < other.start.row
|| other.end.row < self.start.row)
}
pub fn addresses(&self) -> impl Iterator<Item = CellAddress> {
let range = self.clone();
(range.start.row..=range.end.row).flat_map(move |row| {
let range = range.clone();
(range.start.column..=range.end.column).map(move |column| CellAddress { column, row })
})
}
}
pub fn column_letters_to_index(column: &str) -> Result<u32, SpreadsheetArtifactError> {
let trimmed = column.trim();
if trimmed.is_empty() {
return Err(SpreadsheetArtifactError::InvalidAddress {
address: column.to_string(),
message: "column is empty".to_string(),
});
}
let mut result = 0u32;
for ch in trimmed.chars() {
if !ch.is_ascii_alphabetic() {
return Err(SpreadsheetArtifactError::InvalidAddress {
address: column.to_string(),
message: "column must contain only letters".to_string(),
});
}
result = result
.checked_mul(26)
.and_then(|value| value.checked_add((ch.to_ascii_uppercase() as u8 - b'A' + 1) as u32))
.ok_or_else(|| SpreadsheetArtifactError::InvalidAddress {
address: column.to_string(),
message: "column is too large".to_string(),
})?;
}
Ok(result)
}
pub fn column_index_to_letters(mut index: u32) -> String {
if index == 0 {
return String::new();
}
let mut letters = Vec::new();
while index > 0 {
let remainder = (index - 1) % 26;
letters.push((b'A' + remainder as u8) as char);
index = (index - 1) / 26;
}
letters.iter().rev().collect()
}
pub fn parse_column_reference(reference: &str) -> Result<(u32, u32), SpreadsheetArtifactError> {
let trimmed = reference.trim();
if let Some((left, right)) = trimmed.split_once(':') {
let start = column_letters_to_index(left)?;
let end = column_letters_to_index(right)?;
Ok((start.min(end), start.max(end)))
} else {
let column = column_letters_to_index(trimmed)?;
Ok((column, column))
}
}
pub fn is_valid_cell_reference(address: &str) -> bool {
CellAddress::parse(address).is_ok()
}
pub fn is_valid_range_reference(address: &str) -> bool {
CellRange::parse(address).is_ok()
}
pub fn is_valid_row_reference(address: &str) -> bool {
CellRange::parse(address)
.map(|range| range.is_single_row())
.unwrap_or(false)
}
pub fn is_valid_column_reference(address: &str) -> bool {
parse_column_reference(address).is_ok()
}

View File

@@ -0,0 +1,39 @@
use std::path::PathBuf;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum SpreadsheetArtifactError {
#[error("missing `artifact_id` for action `{action}`")]
MissingArtifactId { action: String },
#[error("unknown artifact id `{artifact_id}` for action `{action}`")]
UnknownArtifactId { action: String, artifact_id: String },
#[error("unknown action `{0}`")]
UnknownAction(String),
#[error("invalid args for action `{action}`: {message}")]
InvalidArgs { action: String, message: String },
#[error("invalid address `{address}`: {message}")]
InvalidAddress { address: String, message: String },
#[error("sheet lookup failed for action `{action}`: {message}")]
SheetLookup { action: String, message: String },
#[error("index `{index}` is out of range for action `{action}`; len={len}")]
IndexOutOfRange {
action: String,
index: usize,
len: usize,
},
#[error("merge conflict for action `{action}` on range `{range}` with `{conflict}`")]
MergeConflict {
action: String,
range: String,
conflict: String,
},
#[error("formula error at `{location}`: {message}")]
Formula { location: String, message: String },
#[error("serialization failed: {message}")]
Serialization { message: String },
#[error("failed to import XLSX `{path}`: {message}")]
ImportFailed { path: PathBuf, message: String },
#[error("failed to export XLSX `{path}`: {message}")]
ExportFailed { path: PathBuf, message: String },
}

View File

@@ -0,0 +1,535 @@
use std::collections::BTreeSet;
use crate::CellAddress;
use crate::CellRange;
use crate::SpreadsheetArtifact;
use crate::SpreadsheetArtifactError;
use crate::SpreadsheetCellValue;
#[derive(Debug, Clone)]
enum Token {
Number(f64),
Cell(String),
Ident(String),
Plus,
Minus,
Star,
Slash,
LParen,
RParen,
Colon,
Comma,
}
#[derive(Debug, Clone)]
enum Expr {
Number(f64),
Cell(CellAddress),
Range(CellRange),
UnaryMinus(Box<Expr>),
Binary {
op: BinaryOp,
left: Box<Expr>,
right: Box<Expr>,
},
Function {
name: String,
args: Vec<Expr>,
},
}
#[derive(Debug, Clone, Copy)]
enum BinaryOp {
Add,
Subtract,
Multiply,
Divide,
}
#[derive(Debug, Clone)]
enum EvalValue {
Scalar(Option<SpreadsheetCellValue>),
Range(Vec<Option<SpreadsheetCellValue>>),
}
pub(crate) fn recalculate_workbook(artifact: &mut SpreadsheetArtifact) {
let updates = artifact
.sheets
.iter()
.enumerate()
.flat_map(|(sheet_index, sheet)| {
sheet.cells.iter().filter_map(move |(address, cell)| {
cell.formula
.as_ref()
.map(|formula| (sheet_index, *address, formula.clone()))
})
})
.map(|(sheet_index, address, formula)| {
let mut stack = BTreeSet::new();
let value = evaluate_formula(artifact, sheet_index, &formula, &mut stack)
.unwrap_or_else(|error| {
Some(SpreadsheetCellValue::Error(map_error_to_code(&error)))
});
(sheet_index, address, value)
})
.collect::<Vec<_>>();
for (sheet_index, address, value) in updates {
if let Some(sheet) = artifact.sheets.get_mut(sheet_index)
&& let Some(cell) = sheet.cells.get_mut(&address)
{
cell.value = value;
}
}
}
fn evaluate_formula(
artifact: &SpreadsheetArtifact,
sheet_index: usize,
formula: &str,
stack: &mut BTreeSet<(usize, CellAddress)>,
) -> Result<Option<SpreadsheetCellValue>, SpreadsheetArtifactError> {
let source = formula.trim().trim_start_matches('=');
let tokens = tokenize(source)?;
let mut parser = Parser::new(tokens);
let expr = parser.parse_expression()?;
if parser.has_remaining() {
return Err(SpreadsheetArtifactError::Formula {
location: formula.to_string(),
message: "unexpected trailing tokens".to_string(),
});
}
match evaluate_expr(artifact, sheet_index, &expr, stack)? {
EvalValue::Scalar(value) => Ok(value),
EvalValue::Range(_) => Err(SpreadsheetArtifactError::Formula {
location: formula.to_string(),
message: "range expressions are only allowed inside functions".to_string(),
}),
}
}
fn evaluate_expr(
artifact: &SpreadsheetArtifact,
sheet_index: usize,
expr: &Expr,
stack: &mut BTreeSet<(usize, CellAddress)>,
) -> Result<EvalValue, SpreadsheetArtifactError> {
match expr {
Expr::Number(value) => Ok(EvalValue::Scalar(Some(number_to_value(*value)))),
Expr::Cell(address) => evaluate_cell_reference(artifact, sheet_index, *address, stack),
Expr::Range(range) => {
let sheet = artifact.sheets.get(sheet_index).ok_or_else(|| {
SpreadsheetArtifactError::Formula {
location: range.to_a1(),
message: "sheet index was not found".to_string(),
}
})?;
let values = range
.addresses()
.map(|address| sheet.get_cell(address).and_then(|cell| cell.value.clone()))
.collect::<Vec<_>>();
Ok(EvalValue::Range(values))
}
Expr::UnaryMinus(inner) => {
let value = evaluate_scalar(artifact, sheet_index, inner, stack)?;
Ok(EvalValue::Scalar(match value {
None => Some(SpreadsheetCellValue::Integer(0)),
Some(SpreadsheetCellValue::Integer(value)) => {
Some(SpreadsheetCellValue::Integer(-value))
}
Some(SpreadsheetCellValue::Float(value)) => {
Some(SpreadsheetCellValue::Float(-value))
}
Some(SpreadsheetCellValue::Error(value)) => {
Some(SpreadsheetCellValue::Error(value))
}
Some(_) => Some(SpreadsheetCellValue::Error("#VALUE!".to_string())),
}))
}
Expr::Binary { op, left, right } => {
let left = evaluate_scalar(artifact, sheet_index, left, stack)?;
let right = evaluate_scalar(artifact, sheet_index, right, stack)?;
Ok(EvalValue::Scalar(Some(apply_binary_op(*op, left, right)?)))
}
Expr::Function { name, args } => {
let mut numeric = Vec::new();
for arg in args {
match evaluate_expr(artifact, sheet_index, arg, stack)? {
EvalValue::Scalar(value) => {
if let Some(number) = scalar_to_number(value.clone())? {
numeric.push(number);
}
}
EvalValue::Range(values) => {
for value in values {
if let Some(number) = scalar_to_number(value.clone())? {
numeric.push(number);
}
}
}
}
}
let upper = name.to_ascii_uppercase();
let result = match upper.as_str() {
"SUM" => numeric.iter().sum::<f64>(),
"AVERAGE" => {
if numeric.is_empty() {
return Ok(EvalValue::Scalar(None));
}
numeric.iter().sum::<f64>() / numeric.len() as f64
}
"MIN" => numeric.iter().copied().reduce(f64::min).unwrap_or(0.0),
"MAX" => numeric.iter().copied().reduce(f64::max).unwrap_or(0.0),
_ => {
return Ok(EvalValue::Scalar(Some(SpreadsheetCellValue::Error(
"#NAME?".to_string(),
))));
}
};
Ok(EvalValue::Scalar(Some(number_to_value(result))))
}
}
}
fn evaluate_scalar(
artifact: &SpreadsheetArtifact,
sheet_index: usize,
expr: &Expr,
stack: &mut BTreeSet<(usize, CellAddress)>,
) -> Result<Option<SpreadsheetCellValue>, SpreadsheetArtifactError> {
match evaluate_expr(artifact, sheet_index, expr, stack)? {
EvalValue::Scalar(value) => Ok(value),
EvalValue::Range(_) => Err(SpreadsheetArtifactError::Formula {
location: format!("{expr:?}"),
message: "expected a scalar expression".to_string(),
}),
}
}
fn evaluate_cell_reference(
artifact: &SpreadsheetArtifact,
sheet_index: usize,
address: CellAddress,
stack: &mut BTreeSet<(usize, CellAddress)>,
) -> Result<EvalValue, SpreadsheetArtifactError> {
let Some(sheet) = artifact.sheets.get(sheet_index) else {
return Err(SpreadsheetArtifactError::Formula {
location: address.to_a1(),
message: "sheet index was not found".to_string(),
});
};
let key = (sheet_index, address);
if !stack.insert(key) {
return Ok(EvalValue::Scalar(Some(SpreadsheetCellValue::Error(
"#CYCLE!".to_string(),
))));
}
let value = if let Some(cell) = sheet.get_cell(address) {
if let Some(formula) = &cell.formula {
evaluate_formula(artifact, sheet_index, formula, stack)?
} else {
cell.value.clone()
}
} else {
None
};
stack.remove(&key);
Ok(EvalValue::Scalar(value))
}
fn apply_binary_op(
op: BinaryOp,
left: Option<SpreadsheetCellValue>,
right: Option<SpreadsheetCellValue>,
) -> Result<SpreadsheetCellValue, SpreadsheetArtifactError> {
if let Some(SpreadsheetCellValue::Error(value)) = &left {
return Ok(SpreadsheetCellValue::Error(value.clone()));
}
if let Some(SpreadsheetCellValue::Error(value)) = &right {
return Ok(SpreadsheetCellValue::Error(value.clone()));
}
let left = scalar_to_number(left)?;
let right = scalar_to_number(right)?;
let left = left.unwrap_or(0.0);
let right = right.unwrap_or(0.0);
let result = match op {
BinaryOp::Add => left + right,
BinaryOp::Subtract => left - right,
BinaryOp::Multiply => left * right,
BinaryOp::Divide => {
if right == 0.0 {
return Ok(SpreadsheetCellValue::Error("#DIV/0!".to_string()));
}
left / right
}
};
Ok(number_to_value(result))
}
fn scalar_to_number(
value: Option<SpreadsheetCellValue>,
) -> Result<Option<f64>, SpreadsheetArtifactError> {
match value {
None => Ok(None),
Some(SpreadsheetCellValue::Integer(value)) => Ok(Some(value as f64)),
Some(SpreadsheetCellValue::Float(value)) => Ok(Some(value)),
Some(SpreadsheetCellValue::Bool(value)) => Ok(Some(if value { 1.0 } else { 0.0 })),
Some(SpreadsheetCellValue::Error(value)) => Err(SpreadsheetArtifactError::Formula {
location: value,
message: "encountered error value".to_string(),
}),
Some(other) => Err(SpreadsheetArtifactError::Formula {
location: format!("{other:?}"),
message: "value is not numeric".to_string(),
}),
}
}
fn number_to_value(number: f64) -> SpreadsheetCellValue {
if number.fract() == 0.0 {
SpreadsheetCellValue::Integer(number as i64)
} else {
SpreadsheetCellValue::Float(number)
}
}
fn map_error_to_code(error: &SpreadsheetArtifactError) -> String {
match error {
SpreadsheetArtifactError::Formula { message, .. } => {
if message.contains("cycle") {
"#CYCLE!".to_string()
} else if message.contains("not numeric") || message.contains("scalar") {
"#VALUE!".to_string()
} else {
"#ERROR!".to_string()
}
}
SpreadsheetArtifactError::InvalidAddress { .. } => "#REF!".to_string(),
_ => "#ERROR!".to_string(),
}
}
fn tokenize(source: &str) -> Result<Vec<Token>, SpreadsheetArtifactError> {
let chars = source.chars().collect::<Vec<_>>();
let mut index = 0usize;
let mut tokens = Vec::new();
while index < chars.len() {
let ch = chars[index];
if ch.is_ascii_whitespace() {
index += 1;
continue;
}
match ch {
'+' => {
tokens.push(Token::Plus);
index += 1;
}
'-' => {
tokens.push(Token::Minus);
index += 1;
}
'*' => {
tokens.push(Token::Star);
index += 1;
}
'/' => {
tokens.push(Token::Slash);
index += 1;
}
'(' => {
tokens.push(Token::LParen);
index += 1;
}
')' => {
tokens.push(Token::RParen);
index += 1;
}
':' => {
tokens.push(Token::Colon);
index += 1;
}
',' => {
tokens.push(Token::Comma);
index += 1;
}
'0'..='9' | '.' => {
let start = index;
index += 1;
while index < chars.len() && (chars[index].is_ascii_digit() || chars[index] == '.')
{
index += 1;
}
let number = source[start..index].parse::<f64>().map_err(|_| {
SpreadsheetArtifactError::Formula {
location: source.to_string(),
message: "invalid numeric literal".to_string(),
}
})?;
tokens.push(Token::Number(number));
}
'A'..='Z' | 'a'..='z' | '_' => {
let start = index;
index += 1;
while index < chars.len()
&& (chars[index].is_ascii_alphanumeric() || chars[index] == '_')
{
index += 1;
}
let text = source[start..index].to_string();
if text.chars().any(|part| part.is_ascii_digit())
&& text.chars().any(|part| part.is_ascii_alphabetic())
{
tokens.push(Token::Cell(text));
} else {
tokens.push(Token::Ident(text));
}
}
other => {
return Err(SpreadsheetArtifactError::Formula {
location: source.to_string(),
message: format!("unsupported token `{other}`"),
});
}
}
}
Ok(tokens)
}
struct Parser {
tokens: Vec<Token>,
index: usize,
}
impl Parser {
fn new(tokens: Vec<Token>) -> Self {
Self { tokens, index: 0 }
}
fn has_remaining(&self) -> bool {
self.index < self.tokens.len()
}
fn parse_expression(&mut self) -> Result<Expr, SpreadsheetArtifactError> {
let mut expr = self.parse_term()?;
while let Some(token) = self.peek() {
let op = match token {
Token::Plus => BinaryOp::Add,
Token::Minus => BinaryOp::Subtract,
_ => break,
};
self.index += 1;
let right = self.parse_term()?;
expr = Expr::Binary {
op,
left: Box::new(expr),
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_term(&mut self) -> Result<Expr, SpreadsheetArtifactError> {
let mut expr = self.parse_factor()?;
while let Some(token) = self.peek() {
let op = match token {
Token::Star => BinaryOp::Multiply,
Token::Slash => BinaryOp::Divide,
_ => break,
};
self.index += 1;
let right = self.parse_factor()?;
expr = Expr::Binary {
op,
left: Box::new(expr),
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_factor(&mut self) -> Result<Expr, SpreadsheetArtifactError> {
match self.peek() {
Some(Token::Minus) => {
self.index += 1;
Ok(Expr::UnaryMinus(Box::new(self.parse_factor()?)))
}
_ => self.parse_primary(),
}
}
fn parse_primary(&mut self) -> Result<Expr, SpreadsheetArtifactError> {
match self.next().cloned() {
Some(Token::Number(value)) => Ok(Expr::Number(value)),
Some(Token::Cell(address)) => {
let start = CellAddress::parse(&address)?;
if matches!(self.peek(), Some(Token::Colon)) {
self.index += 1;
let Some(Token::Cell(end)) = self.next().cloned() else {
return Err(SpreadsheetArtifactError::Formula {
location: address,
message: "expected cell after `:`".to_string(),
});
};
Ok(Expr::Range(CellRange::from_start_end(
start,
CellAddress::parse(&end)?,
)))
} else {
Ok(Expr::Cell(start))
}
}
Some(Token::Ident(name)) => {
if !matches!(self.next(), Some(Token::LParen)) {
return Err(SpreadsheetArtifactError::Formula {
location: name,
message: "expected `(` after function name".to_string(),
});
}
let mut args = Vec::new();
if !matches!(self.peek(), Some(Token::RParen)) {
loop {
args.push(self.parse_expression()?);
if matches!(self.peek(), Some(Token::Comma)) {
self.index += 1;
continue;
}
break;
}
}
if !matches!(self.next(), Some(Token::RParen)) {
return Err(SpreadsheetArtifactError::Formula {
location: name,
message: "expected `)`".to_string(),
});
}
Ok(Expr::Function { name, args })
}
Some(Token::LParen) => {
let expr = self.parse_expression()?;
if !matches!(self.next(), Some(Token::RParen)) {
return Err(SpreadsheetArtifactError::Formula {
location: format!("{expr:?}"),
message: "expected `)`".to_string(),
});
}
Ok(expr)
}
other => Err(SpreadsheetArtifactError::Formula {
location: format!("{other:?}"),
message: "unexpected token".to_string(),
}),
}
}
fn peek(&self) -> Option<&Token> {
self.tokens.get(self.index)
}
fn next(&mut self) -> Option<&Token> {
let token = self.tokens.get(self.index);
self.index += usize::from(token.is_some());
token
}
}

View File

@@ -0,0 +1,14 @@
mod address;
mod error;
mod formula;
mod manager;
mod model;
mod xlsx;
#[cfg(test)]
mod tests;
pub use address::*;
pub use error::*;
pub use manager::*;
pub use model::*;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,302 @@
use pretty_assertions::assert_eq;
use crate::SpreadsheetArtifact;
use crate::SpreadsheetArtifactManager;
use crate::SpreadsheetArtifactRequest;
use crate::SpreadsheetCellValue;
#[test]
fn manager_can_create_edit_recalculate_and_export() -> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempfile::tempdir()?;
let mut manager = SpreadsheetArtifactManager::default();
let created = manager.execute(
SpreadsheetArtifactRequest {
artifact_id: None,
action: "create".to_string(),
args: serde_json::json!({ "name": "Budget" }),
},
temp_dir.path(),
)?;
let artifact_id = created.artifact_id;
manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "create_sheet".to_string(),
args: serde_json::json!({ "name": "Sheet1" }),
},
temp_dir.path(),
)?;
manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "set_range_values".to_string(),
args: serde_json::json!({
"sheet_name": "Sheet1",
"range": "A1:B2",
"values": [[1, 2], [3, 4]]
}),
},
temp_dir.path(),
)?;
manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "set_cell_formula".to_string(),
args: serde_json::json!({
"sheet_name": "Sheet1",
"address": "C1",
"formula": "=SUM(A1:B2)",
"recalculate": true
}),
},
temp_dir.path(),
)?;
let cell = manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "get_cell".to_string(),
args: serde_json::json!({
"sheet_name": "Sheet1",
"address": "C1"
}),
},
temp_dir.path(),
)?;
assert_eq!(
cell.cell.and_then(|entry| entry.value),
Some(SpreadsheetCellValue::Integer(10))
);
let export_path = temp_dir.path().join("budget.xlsx");
let export = manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id),
action: "export_xlsx".to_string(),
args: serde_json::json!({ "path": export_path }),
},
temp_dir.path(),
)?;
assert_eq!(export.exported_paths.len(), 1);
assert!(export.exported_paths[0].exists());
Ok(())
}
#[test]
fn spreadsheet_serialization_roundtrip_preserves_cells() -> Result<(), Box<dyn std::error::Error>> {
let mut artifact = SpreadsheetArtifact::new(Some("Roundtrip".to_string()));
let sheet = artifact.create_sheet("Sheet1".to_string())?;
sheet.set_value(
crate::CellAddress::parse("A1")?,
Some(SpreadsheetCellValue::String("hello".to_string())),
)?;
sheet.set_formula(crate::CellAddress::parse("B1")?, Some("=A1".to_string()))?;
artifact.recalculate();
let json = artifact.to_json()?;
let restored = SpreadsheetArtifact::from_json(json, None)?;
let restored_sheet = restored.get_sheet(Some("Sheet1"), None).expect("sheet");
let cell = restored_sheet.get_cell_view(crate::CellAddress::parse("A1")?);
assert_eq!(
cell.value,
Some(SpreadsheetCellValue::String("hello".to_string()))
);
Ok(())
}
#[test]
fn xlsx_roundtrip_preserves_merged_ranges_and_style_indices()
-> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempfile::tempdir()?;
let path = temp_dir.path().join("styled.xlsx");
let mut artifact = SpreadsheetArtifact::new(Some("Styled".to_string()));
let sheet = artifact.create_sheet("Sheet1".to_string())?;
sheet.set_value(
crate::CellAddress::parse("A1")?,
Some(SpreadsheetCellValue::Integer(42)),
)?;
sheet.set_style_index(&crate::CellRange::parse("A1:B1")?, 3)?;
sheet.merge_cells(&crate::CellRange::parse("A1:B1")?, true)?;
artifact.export(&path)?;
let restored = SpreadsheetArtifact::from_source_file(&path, None)?;
let restored_sheet = restored.get_sheet(Some("Sheet1"), None).expect("sheet");
assert_eq!(restored_sheet.merged_ranges.len(), 1);
assert_eq!(
restored_sheet
.get_cell_view(crate::CellAddress::parse("A1")?)
.style_index,
3
);
Ok(())
}
#[test]
fn path_accesses_cover_import_and_export() -> Result<(), Box<dyn std::error::Error>> {
let cwd = tempfile::tempdir()?;
let request = crate::SpreadsheetArtifactRequest {
artifact_id: Some("spreadsheet_1".to_string()),
action: "export_xlsx".to_string(),
args: serde_json::json!({ "path": "out/report.xlsx" }),
};
let accesses = request.required_path_accesses(cwd.path())?;
assert_eq!(accesses.len(), 1);
assert!(accesses[0].path.ends_with("out/report.xlsx"));
Ok(())
}
#[test]
fn sheet_refs_support_handle_and_field_apis() -> Result<(), Box<dyn std::error::Error>> {
let mut artifact = SpreadsheetArtifact::new(Some("Handles".to_string()));
let (range_ref, cell_ref) = {
let sheet = artifact.create_sheet("Sheet1".to_string())?;
let range_ref = sheet.range_ref("A1:B2")?;
range_ref.set_value(sheet, Some(SpreadsheetCellValue::Integer(7)))?;
let cell_ref = sheet.cell_ref("B2")?;
cell_ref.set_formula(sheet, Some("=SUM(A1:B2)".to_string()))?;
(range_ref, cell_ref)
};
artifact.recalculate();
let sheet = artifact.get_sheet(Some("Sheet1"), None).expect("sheet");
let values = range_ref.get_values(sheet)?;
assert_eq!(values[0][0], Some(SpreadsheetCellValue::Integer(7)));
assert_eq!(
cell_ref.get(sheet)?.value,
Some(SpreadsheetCellValue::Integer(28))
);
assert_eq!(
sheet.get_cell_field_by_indices(2, 2, "formula")?,
Some(serde_json::Value::String("=SUM(A1:B2)".to_string()))
);
assert_eq!(
sheet.minimum_range_ref().map(|entry| entry.address),
Some("A1:B2".to_string())
);
assert!(matches!(
sheet.to_dict()?,
serde_json::Value::Object(_) | serde_json::Value::Array(_)
));
Ok(())
}
#[test]
fn manager_supports_single_value_formula_and_cite_cell_actions()
-> Result<(), Box<dyn std::error::Error>> {
let temp_dir = tempfile::tempdir()?;
let mut manager = SpreadsheetArtifactManager::default();
let created = manager.execute(
SpreadsheetArtifactRequest {
artifact_id: None,
action: "create".to_string(),
args: serde_json::json!({ "name": "Actions" }),
},
temp_dir.path(),
)?;
let artifact_id = created.artifact_id;
manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "create_sheet".to_string(),
args: serde_json::json!({ "name": "Sheet1" }),
},
temp_dir.path(),
)?;
let uniform = manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "set_range_value".to_string(),
args: serde_json::json!({
"sheet_name": "Sheet1",
"range": "A1:B2",
"value": 5
}),
},
temp_dir.path(),
)?;
assert_eq!(
uniform
.range_ref
.as_ref()
.map(|entry| entry.address.clone()),
Some("A1:B2".to_string())
);
manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "set_range_formula".to_string(),
args: serde_json::json!({
"sheet_name": "Sheet1",
"range": "C1:C2",
"formula": "=SUM(A1:B2)",
"recalculate": true
}),
},
temp_dir.path(),
)?;
let cited = manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "cite_cell".to_string(),
args: serde_json::json!({
"sheet_name": "Sheet1",
"address": "C1",
"tether_id": "source-1",
"start_line": 3,
"end_line": 8
}),
},
temp_dir.path(),
)?;
assert_eq!(
cited.cell.as_ref().map(|entry| entry.citations.len()),
Some(1)
);
let by_indices = manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id.clone()),
action: "get_cell_by_indices".to_string(),
args: serde_json::json!({
"sheet_name": "Sheet1",
"column_index": 3,
"row_index": 1
}),
},
temp_dir.path(),
)?;
assert_eq!(
by_indices
.cell
.as_ref()
.and_then(|entry| entry.value.clone()),
Some(SpreadsheetCellValue::Integer(20))
);
let field = manager.execute(
SpreadsheetArtifactRequest {
artifact_id: Some(artifact_id),
action: "get_cell_field".to_string(),
args: serde_json::json!({
"sheet_name": "Sheet1",
"address": "C1",
"field": "formula"
}),
},
temp_dir.path(),
)?;
assert_eq!(
field.cell_field,
Some(serde_json::Value::String("=SUM(A1:B2)".to_string()))
);
Ok(())
}

View File

@@ -0,0 +1,817 @@
use std::collections::BTreeMap;
use std::fs::File;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use regex::Regex;
use zip::ZipArchive;
use zip::ZipWriter;
use zip::write::SimpleFileOptions;
use crate::CellAddress;
use crate::CellRange;
use crate::SpreadsheetArtifact;
use crate::SpreadsheetArtifactError;
use crate::SpreadsheetCell;
use crate::SpreadsheetCellValue;
use crate::SpreadsheetSheet;
pub(crate) fn write_xlsx(
artifact: &mut SpreadsheetArtifact,
path: &Path,
) -> Result<PathBuf, SpreadsheetArtifactError> {
if artifact.auto_recalculate {
artifact.recalculate();
}
let file = File::create(path).map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
let mut zip = ZipWriter::new(file);
let options = SimpleFileOptions::default();
let sheet_count = artifact.sheets.len().max(1);
zip.start_file("[Content_Types].xml", options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.write_all(content_types_xml(sheet_count).as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.add_directory("_rels/", options).map_err(|error| {
SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
}
})?;
zip.start_file("_rels/.rels", options).map_err(|error| {
SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
}
})?;
zip.write_all(root_relationships_xml().as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.add_directory("docProps/", options).map_err(|error| {
SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
}
})?;
zip.start_file("docProps/app.xml", options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.write_all(app_xml(artifact).as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.start_file("docProps/core.xml", options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.write_all(core_xml(artifact).as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.add_directory("xl/", options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.start_file("xl/workbook.xml", options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.write_all(workbook_xml(artifact).as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.add_directory("xl/_rels/", options).map_err(|error| {
SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
}
})?;
zip.start_file("xl/_rels/workbook.xml.rels", options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.write_all(workbook_relationships_xml(artifact).as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.start_file("xl/styles.xml", options).map_err(|error| {
SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
}
})?;
zip.write_all(styles_xml(artifact).as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.add_directory("xl/worksheets/", options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
if artifact.sheets.is_empty() {
let empty = SpreadsheetSheet::new("Sheet1".to_string());
zip.start_file("xl/worksheets/sheet1.xml", options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.write_all(sheet_xml(&empty).as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
} else {
for (index, sheet) in artifact.sheets.iter().enumerate() {
zip.start_file(format!("xl/worksheets/sheet{}.xml", index + 1), options)
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
zip.write_all(sheet_xml(sheet).as_bytes())
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
}
}
zip.finish()
.map_err(|error| SpreadsheetArtifactError::ExportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
Ok(path.to_path_buf())
}
pub(crate) fn import_xlsx(
path: &Path,
artifact_id: Option<String>,
) -> Result<SpreadsheetArtifact, SpreadsheetArtifactError> {
let file = File::open(path).map_err(|error| SpreadsheetArtifactError::ImportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
let mut archive =
ZipArchive::new(file).map_err(|error| SpreadsheetArtifactError::ImportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
let workbook_xml = read_zip_entry(&mut archive, "xl/workbook.xml", path)?;
let workbook_rels = read_zip_entry(&mut archive, "xl/_rels/workbook.xml.rels", path)?;
let shared_strings = if archive.by_name("xl/sharedStrings.xml").is_ok() {
Some(parse_shared_strings(&read_zip_entry(
&mut archive,
"xl/sharedStrings.xml",
path,
)?)?)
} else {
None
};
let relationships = parse_relationships(&workbook_rels)?;
let sheets = parse_sheet_definitions(&workbook_xml)?
.into_iter()
.map(|(name, relation)| {
let target = relationships.get(&relation).ok_or_else(|| {
SpreadsheetArtifactError::ImportFailed {
path: path.to_path_buf(),
message: format!("missing relationship `{relation}` for sheet `{name}`"),
}
})?;
let normalized = if target.starts_with('/') {
target.trim_start_matches('/').to_string()
} else if target.starts_with("xl/") {
target.clone()
} else {
format!("xl/{target}")
};
Ok((name, normalized))
})
.collect::<Result<Vec<_>, SpreadsheetArtifactError>>()?;
let mut artifact = SpreadsheetArtifact::new(
path.file_stem()
.and_then(|value| value.to_str())
.map(str::to_string),
);
if let Some(artifact_id) = artifact_id {
artifact.artifact_id = artifact_id;
}
artifact.sheets.clear();
for (name, target) in sheets {
let xml = read_zip_entry(&mut archive, &target, path)?;
let sheet = parse_sheet(&name, &xml, shared_strings.as_deref())?;
artifact.sheets.push(sheet);
}
Ok(artifact)
}
fn read_zip_entry(
archive: &mut ZipArchive<File>,
entry: &str,
path: &Path,
) -> Result<String, SpreadsheetArtifactError> {
let mut file =
archive
.by_name(entry)
.map_err(|error| SpreadsheetArtifactError::ImportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
let mut text = String::new();
file.read_to_string(&mut text)
.map_err(|error| SpreadsheetArtifactError::ImportFailed {
path: path.to_path_buf(),
message: error.to_string(),
})?;
Ok(text)
}
fn parse_sheet_definitions(
workbook_xml: &str,
) -> Result<Vec<(String, String)>, SpreadsheetArtifactError> {
let regex = Regex::new(r#"<sheet\b([^>]*)/?>"#).map_err(|error| {
SpreadsheetArtifactError::Serialization {
message: error.to_string(),
}
})?;
let mut sheets = Vec::new();
for captures in regex.captures_iter(workbook_xml) {
let Some(attributes) = captures.get(1).map(|value| value.as_str()) else {
continue;
};
let Some(name) = extract_attribute(attributes, "name") else {
continue;
};
let relation = extract_attribute(attributes, "r:id")
.or_else(|| extract_attribute(attributes, "id"))
.unwrap_or_default();
sheets.push((xml_unescape(&name), relation));
}
Ok(sheets)
}
fn parse_relationships(xml: &str) -> Result<BTreeMap<String, String>, SpreadsheetArtifactError> {
let regex = Regex::new(r#"<Relationship\b([^>]*)/?>"#).map_err(|error| {
SpreadsheetArtifactError::Serialization {
message: error.to_string(),
}
})?;
Ok(regex
.captures_iter(xml)
.filter_map(|captures| {
let attributes = captures.get(1)?.as_str();
let id = extract_attribute(attributes, "Id")?;
let target = extract_attribute(attributes, "Target")?;
Some((id, target))
})
.collect())
}
fn parse_shared_strings(xml: &str) -> Result<Vec<String>, SpreadsheetArtifactError> {
let regex = Regex::new(r#"(?s)<si\b[^>]*>(.*?)</si>"#).map_err(|error| {
SpreadsheetArtifactError::Serialization {
message: error.to_string(),
}
})?;
regex
.captures_iter(xml)
.filter_map(|captures| captures.get(1).map(|value| value.as_str()))
.map(all_text_nodes)
.collect()
}
fn parse_sheet(
name: &str,
xml: &str,
shared_strings: Option<&[String]>,
) -> Result<SpreadsheetSheet, SpreadsheetArtifactError> {
let mut sheet = SpreadsheetSheet::new(name.to_string());
if let Some(sheet_view) = first_tag_attributes(xml, "sheetView")
&& let Some(show_grid_lines) = extract_attribute(&sheet_view, "showGridLines")
{
sheet.show_grid_lines = show_grid_lines != "0";
}
if let Some(format_pr) = first_tag_attributes(xml, "sheetFormatPr") {
sheet.default_row_height = extract_attribute(&format_pr, "defaultRowHeight")
.and_then(|value| value.parse::<f64>().ok());
sheet.default_column_width = extract_attribute(&format_pr, "defaultColWidth")
.and_then(|value| value.parse::<f64>().ok());
}
let col_regex = Regex::new(r#"<col\b([^>]*)/?>"#).map_err(|error| {
SpreadsheetArtifactError::Serialization {
message: error.to_string(),
}
})?;
for captures in col_regex.captures_iter(xml) {
let Some(attributes) = captures.get(1).map(|value| value.as_str()) else {
continue;
};
let Some(min) =
extract_attribute(attributes, "min").and_then(|value| value.parse::<u32>().ok())
else {
continue;
};
let Some(max) =
extract_attribute(attributes, "max").and_then(|value| value.parse::<u32>().ok())
else {
continue;
};
let Some(width) =
extract_attribute(attributes, "width").and_then(|value| value.parse::<f64>().ok())
else {
continue;
};
for column in min..=max {
sheet.column_widths.insert(column, width);
}
}
let row_regex = Regex::new(r#"(?s)<row\b[^>]*>(.*?)</row>"#).map_err(|error| {
SpreadsheetArtifactError::Serialization {
message: error.to_string(),
}
})?;
let cell_regex = Regex::new(r#"(?s)<c\b([^>]*)>(.*?)</c>"#).map_err(|error| {
SpreadsheetArtifactError::Serialization {
message: error.to_string(),
}
})?;
for row_captures in row_regex.captures_iter(xml) {
let Some(row_body) = row_captures.get(1).map(|value| value.as_str()) else {
continue;
};
for cell_captures in cell_regex.captures_iter(row_body) {
let Some(attributes) = cell_captures.get(1).map(|value| value.as_str()) else {
continue;
};
let Some(body) = cell_captures.get(2).map(|value| value.as_str()) else {
continue;
};
let Some(address) = extract_attribute(attributes, "r") else {
continue;
};
let address = CellAddress::parse(&address)?;
let style_index = extract_attribute(attributes, "s")
.and_then(|value| value.parse::<u32>().ok())
.unwrap_or(0);
let cell_type = extract_attribute(attributes, "t").unwrap_or_default();
let formula = first_tag_text(body, "f").map(|value| format!("={value}"));
let value = parse_cell_value(body, &cell_type, shared_strings)?;
let cell = SpreadsheetCell {
value,
formula,
style_index,
citations: Vec::new(),
};
if !cell.is_empty() {
sheet.cells.insert(address, cell);
}
}
}
let merge_regex = Regex::new(r#"<mergeCell\b([^>]*)/?>"#).map_err(|error| {
SpreadsheetArtifactError::Serialization {
message: error.to_string(),
}
})?;
for captures in merge_regex.captures_iter(xml) {
let Some(attributes) = captures.get(1).map(|value| value.as_str()) else {
continue;
};
if let Some(reference) = extract_attribute(attributes, "ref") {
sheet.merged_ranges.push(CellRange::parse(&reference)?);
}
}
Ok(sheet)
}
fn parse_cell_value(
body: &str,
cell_type: &str,
shared_strings: Option<&[String]>,
) -> Result<Option<SpreadsheetCellValue>, SpreadsheetArtifactError> {
let inline_text = first_tag_text(body, "t").map(|value| xml_unescape(&value));
let raw_value = first_tag_text(body, "v").map(|value| xml_unescape(&value));
let parsed = match cell_type {
"inlineStr" => inline_text.map(SpreadsheetCellValue::String),
"s" => raw_value
.and_then(|value| value.parse::<usize>().ok())
.and_then(|index| shared_strings.and_then(|entries| entries.get(index).cloned()))
.map(SpreadsheetCellValue::String),
"b" => raw_value.map(|value| SpreadsheetCellValue::Bool(value == "1")),
"str" => raw_value.map(SpreadsheetCellValue::String),
"e" => raw_value.map(SpreadsheetCellValue::Error),
_ => match raw_value {
Some(value) => {
if let Ok(integer) = value.parse::<i64>() {
Some(SpreadsheetCellValue::Integer(integer))
} else if let Ok(float) = value.parse::<f64>() {
Some(SpreadsheetCellValue::Float(float))
} else {
Some(SpreadsheetCellValue::String(value))
}
}
None => None,
},
};
Ok(parsed)
}
fn content_types_xml(sheet_count: usize) -> String {
let mut overrides = String::new();
for index in 1..=sheet_count {
overrides.push_str(&format!(
r#"<Override PartName="/xl/worksheets/sheet{index}.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>"#
));
}
format!(
"{}{}{}{}{}{}{}{}{}{}",
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#,
r#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">"#,
r#"<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>"#,
r#"<Default Extension="xml" ContentType="application/xml"/>"#,
r#"<Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>"#,
r#"<Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>"#,
r#"<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>"#,
r#"<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>"#,
overrides,
r#"</Types>"#
)
}
fn root_relationships_xml() -> &'static str {
concat!(
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#,
r#"<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">"#,
r#"<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>"#,
r#"<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>"#,
r#"<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>"#,
r#"</Relationships>"#
)
}
fn app_xml(artifact: &SpreadsheetArtifact) -> String {
let title = artifact
.name
.clone()
.unwrap_or_else(|| "Spreadsheet".to_string());
format!(
concat!(
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#,
r#"<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">"#,
r#"<Application>Codex</Application>"#,
r#"<DocSecurity>0</DocSecurity>"#,
r#"<ScaleCrop>false</ScaleCrop>"#,
r#"<HeadingPairs><vt:vector size="2" baseType="variant"><vt:variant><vt:lpstr>Worksheets</vt:lpstr></vt:variant><vt:variant><vt:i4>{}</vt:i4></vt:variant></vt:vector></HeadingPairs>"#,
r#"<TitlesOfParts><vt:vector size="{}" baseType="lpstr">{}</vt:vector></TitlesOfParts>"#,
r#"<Company>OpenAI</Company>"#,
r#"<Manager>{}</Manager>"#,
r#"</Properties>"#
),
artifact.sheets.len(),
artifact.sheets.len(),
artifact
.sheets
.iter()
.map(|sheet| format!(r#"<vt:lpstr>{}</vt:lpstr>"#, xml_escape(&sheet.name)))
.collect::<Vec<_>>()
.join(""),
xml_escape(&title),
)
}
fn core_xml(artifact: &SpreadsheetArtifact) -> String {
let title = artifact
.name
.clone()
.unwrap_or_else(|| artifact.artifact_id.clone());
format!(
concat!(
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#,
r#"<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcmitype="http://purl.org/dc/dcmitype/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">"#,
r#"<dc:title>{}</dc:title>"#,
r#"<dc:creator>Codex</dc:creator>"#,
r#"<cp:lastModifiedBy>Codex</cp:lastModifiedBy>"#,
r#"</cp:coreProperties>"#
),
xml_escape(&title),
)
}
fn workbook_xml(artifact: &SpreadsheetArtifact) -> String {
let sheets = if artifact.sheets.is_empty() {
r#"<sheet name="Sheet1" sheetId="1" r:id="rId1"/>"#.to_string()
} else {
artifact
.sheets
.iter()
.enumerate()
.map(|(index, sheet)| {
format!(
r#"<sheet name="{}" sheetId="{}" r:id="rId{}"/>"#,
xml_escape(&sheet.name),
index + 1,
index + 1
)
})
.collect::<Vec<_>>()
.join("")
};
format!(
"{}{}{}<sheets>{}</sheets>{}",
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#,
r#"<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">"#,
r#"<bookViews><workbookView/></bookViews>"#,
sheets,
r#"</workbook>"#
)
}
fn workbook_relationships_xml(artifact: &SpreadsheetArtifact) -> String {
let sheet_relationships = if artifact.sheets.is_empty() {
r#"<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>"#.to_string()
} else {
artifact
.sheets
.iter()
.enumerate()
.map(|(index, _)| {
format!(
r#"<Relationship Id="rId{}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet{}.xml"/>"#,
index + 1,
index + 1
)
})
.collect::<Vec<_>>()
.join("")
};
let style_relation_id = artifact.sheets.len().max(1) + 1;
format!(
"{}{}{}<Relationship Id=\"rId{}\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles\" Target=\"styles.xml\"/>{}",
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#,
r#"<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">"#,
sheet_relationships,
style_relation_id,
r#"</Relationships>"#
)
}
fn styles_xml(artifact: &SpreadsheetArtifact) -> String {
let max_style_index = artifact
.sheets
.iter()
.flat_map(|sheet| sheet.cells.values().map(|cell| cell.style_index))
.max()
.unwrap_or(0);
let cell_xfs = (0..=max_style_index)
.map(|_| r#"<xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>"#)
.collect::<Vec<_>>()
.join("");
format!(
concat!(
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#,
r#"<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">"#,
r#"<fonts count="1"><font/></fonts>"#,
r#"<fills count="2"><fill><patternFill patternType="none"/></fill><fill><patternFill patternType="gray125"/></fill></fills>"#,
r#"<borders count="1"><border/></borders>"#,
r#"<cellStyleXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellStyleXfs>"#,
r#"<cellXfs count="{}">{}</cellXfs>"#,
r#"<cellStyles count="1"><cellStyle name="Normal" xfId="0" builtinId="0"/></cellStyles>"#,
r#"</styleSheet>"#
),
max_style_index + 1,
cell_xfs,
)
}
fn sheet_xml(sheet: &SpreadsheetSheet) -> String {
let mut rows = BTreeMap::<u32, Vec<(CellAddress, &SpreadsheetCell)>>::new();
for (address, cell) in &sheet.cells {
rows.entry(address.row).or_default().push((*address, cell));
}
let sheet_data = rows
.into_iter()
.map(|(row_index, mut entries)| {
entries.sort_by_key(|(address, _)| address.column);
let cells = entries
.into_iter()
.map(|(address, cell)| cell_xml(address, cell))
.collect::<Vec<_>>()
.join("");
format!(r#"<row r="{row_index}">{cells}</row>"#)
})
.collect::<Vec<_>>()
.join("");
let cols = if sheet.column_widths.is_empty() {
String::new()
} else {
let mut groups = Vec::new();
let mut iter = sheet.column_widths.iter().peekable();
while let Some((&start, &width)) = iter.next() {
let mut end = start;
while let Some((next_column, next_width)) =
iter.peek().map(|(column, width)| (**column, **width))
{
if next_column == end + 1 && (next_width - width).abs() < f64::EPSILON {
end = next_column;
iter.next();
} else {
break;
}
}
groups.push(format!(
r#"<col min="{start}" max="{end}" width="{width}" customWidth="1"/>"#
));
}
format!("<cols>{}</cols>", groups.join(""))
};
let merge_cells = if sheet.merged_ranges.is_empty() {
String::new()
} else {
format!(
r#"<mergeCells count="{}">{}</mergeCells>"#,
sheet.merged_ranges.len(),
sheet
.merged_ranges
.iter()
.map(|range| format!(r#"<mergeCell ref="{}"/>"#, range.to_a1()))
.collect::<Vec<_>>()
.join("")
)
};
let default_row_height = sheet.default_row_height.unwrap_or(15.0);
let default_column_width = sheet.default_column_width.unwrap_or(8.43);
let grid_lines = if sheet.show_grid_lines { "1" } else { "0" };
format!(
"{}{}<sheetViews><sheetView workbookViewId=\"0\" showGridLines=\"{}\"/></sheetViews><sheetFormatPr defaultRowHeight=\"{}\" defaultColWidth=\"{}\"/>{}<sheetData>{}</sheetData>{}{}",
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#,
r#"<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">"#,
grid_lines,
default_row_height,
default_column_width,
cols,
sheet_data,
merge_cells,
r#"</worksheet>"#
)
}
fn cell_xml(address: CellAddress, cell: &SpreadsheetCell) -> String {
let style = if cell.style_index == 0 {
String::new()
} else {
format!(r#" s="{}""#, cell.style_index)
};
if let Some(formula) = &cell.formula {
let formula = xml_escape(formula.trim_start_matches('='));
let value_xml = match &cell.value {
Some(SpreadsheetCellValue::Bool(value)) => {
format!(
r#" t="b"><f>{formula}</f><v>{}</v></c>"#,
usize::from(*value)
)
}
Some(SpreadsheetCellValue::Integer(value)) => {
format!(r#"><f>{formula}</f><v>{value}</v></c>"#)
}
Some(SpreadsheetCellValue::Float(value)) => {
format!(r#"><f>{formula}</f><v>{value}</v></c>"#)
}
Some(SpreadsheetCellValue::String(value))
| Some(SpreadsheetCellValue::DateTime(value)) => format!(
r#" t="str"><f>{formula}</f><v>{}</v></c>"#,
xml_escape(value)
),
Some(SpreadsheetCellValue::Error(value)) => {
format!(r#" t="e"><f>{formula}</f><v>{}</v></c>"#, xml_escape(value))
}
None => format!(r#"><f>{formula}</f></c>"#),
};
return format!(r#"<c r="{}"{style}{value_xml}"#, address.to_a1());
}
match &cell.value {
Some(SpreadsheetCellValue::Bool(value)) => format!(
r#"<c r="{}"{style} t="b"><v>{}</v></c>"#,
address.to_a1(),
usize::from(*value)
),
Some(SpreadsheetCellValue::Integer(value)) => {
format!(r#"<c r="{}"{style}><v>{value}</v></c>"#, address.to_a1())
}
Some(SpreadsheetCellValue::Float(value)) => {
format!(r#"<c r="{}"{style}><v>{value}</v></c>"#, address.to_a1())
}
Some(SpreadsheetCellValue::String(value)) | Some(SpreadsheetCellValue::DateTime(value)) => {
format!(
r#"<c r="{}"{style} t="inlineStr"><is><t>{}</t></is></c>"#,
address.to_a1(),
xml_escape(value)
)
}
Some(SpreadsheetCellValue::Error(value)) => format!(
r#"<c r="{}"{style} t="e"><v>{}</v></c>"#,
address.to_a1(),
xml_escape(value)
),
None => format!(r#"<c r="{}"{style}/>"#, address.to_a1()),
}
}
fn first_tag_attributes(xml: &str, tag: &str) -> Option<String> {
let regex = Regex::new(&format!(r#"<{tag}\b([^>]*)/?>"#)).ok()?;
let captures = regex.captures(xml)?;
captures.get(1).map(|value| value.as_str().to_string())
}
fn first_tag_text(xml: &str, tag: &str) -> Option<String> {
let regex = Regex::new(&format!(r#"(?s)<{tag}\b[^>]*>(.*?)</{tag}>"#)).ok()?;
let captures = regex.captures(xml)?;
captures.get(1).map(|value| value.as_str().to_string())
}
fn all_text_nodes(xml: &str) -> Result<String, SpreadsheetArtifactError> {
let regex = Regex::new(r#"(?s)<t\b[^>]*>(.*?)</t>"#).map_err(|error| {
SpreadsheetArtifactError::Serialization {
message: error.to_string(),
}
})?;
Ok(regex
.captures_iter(xml)
.filter_map(|captures| captures.get(1).map(|value| xml_unescape(value.as_str())))
.collect::<Vec<_>>()
.join(""))
}
fn extract_attribute(attributes: &str, name: &str) -> Option<String> {
let pattern = format!(r#"{name}="([^"]*)""#);
let regex = Regex::new(&pattern).ok()?;
let captures = regex.captures(attributes)?;
captures.get(1).map(|value| xml_unescape(value.as_str()))
}
fn xml_escape(value: &str) -> String {
value
.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&apos;")
}
fn xml_unescape(value: &str) -> String {
value
.replace("&apos;", "'")
.replace("&quot;", "\"")
.replace("&gt;", ">")
.replace("&lt;", "<")
.replace("&amp;", "&")
}