mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
nit 2
This commit is contained in:
@@ -6466,6 +6466,17 @@ async fn try_run_sampling_request(
|
||||
}
|
||||
};
|
||||
|
||||
flush_citation_segments_all(
|
||||
&sess,
|
||||
&turn_context,
|
||||
plan_mode_state.as_mut(),
|
||||
&mut assistant_message_stream_parsers,
|
||||
)
|
||||
.await;
|
||||
if let Some(state) = plan_mode_state.as_mut() {
|
||||
flush_proposed_plan_segments_all(&sess, &turn_context, state).await;
|
||||
}
|
||||
|
||||
drain_in_flight(&mut in_flight, sess.clone(), turn_context.clone()).await?;
|
||||
|
||||
if should_emit_turn_diff {
|
||||
|
||||
@@ -227,7 +227,7 @@ pub(crate) fn last_assistant_message_from_item(
|
||||
}
|
||||
let stripped = strip_hidden_assistant_markup(&combined, plan_mode);
|
||||
if stripped.trim().is_empty() {
|
||||
return Some(String::new());
|
||||
return None;
|
||||
}
|
||||
return Some(stripped);
|
||||
}
|
||||
@@ -320,12 +320,16 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn last_assistant_message_from_item_returns_empty_string_for_citation_only_message() {
|
||||
fn last_assistant_message_from_item_returns_none_for_citation_only_message() {
|
||||
let item = assistant_output_text("<citation>doc1</citation>");
|
||||
|
||||
let message = last_assistant_message_from_item(&item, false)
|
||||
.expect("assistant item should still count as latest message");
|
||||
assert_eq!(last_assistant_message_from_item(&item, false), None);
|
||||
}
|
||||
|
||||
assert_eq!(message, "");
|
||||
#[test]
|
||||
fn last_assistant_message_from_item_returns_none_for_plan_only_hidden_message() {
|
||||
let item = assistant_output_text("<proposed_plan>\n- x\n</proposed_plan>");
|
||||
|
||||
assert_eq!(last_assistant_message_from_item(&item, true), None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,5 +92,9 @@ assert_eq!(out.extracted[0].content, "x");
|
||||
- No tag attributes
|
||||
- No nested tag support
|
||||
- Unterminated open tags are auto-closed on `finish()` (buffered content is returned as extracted)
|
||||
- `Utf8StreamParser::push_bytes(...)` rolls back the entire pushed chunk on invalid UTF-8 so the
|
||||
wrapped parser does not observe a partial prefix from that chunk
|
||||
- `Utf8StreamParser::into_inner()` returns an error if undecoded UTF-8 bytes are still buffered;
|
||||
use `into_inner_lossy()` only if you intentionally want to drop buffered partial bytes
|
||||
- `StreamTextParser::push_str(...)` accepts only valid UTF-8 (`&str`); use `Utf8StreamParser` if your
|
||||
upstream source yields raw bytes
|
||||
|
||||
@@ -15,6 +15,9 @@ const CITATION_CLOSE: &str = "</citation>";
|
||||
///
|
||||
/// This is a thin convenience wrapper around [`InlineHiddenTagParser`]. It returns citation bodies
|
||||
/// as plain strings and omits the citation tags from visible text.
|
||||
///
|
||||
/// Matching is literal and non-nested. If EOF is reached before a closing `</citation>`, the
|
||||
/// parser auto-closes the tag and returns the buffered body as an extracted citation.
|
||||
#[derive(Debug)]
|
||||
pub struct CitationStreamParser {
|
||||
inner: InlineHiddenTagParser<CitationTag>,
|
||||
@@ -59,6 +62,9 @@ impl StreamTextParser for CitationStreamParser {
|
||||
}
|
||||
|
||||
/// Strip citation tags from a complete string and return `(visible_text, citations)`.
|
||||
///
|
||||
/// This uses [`CitationStreamParser`] internally, so it inherits the same semantics:
|
||||
/// literal, non-nested matching and auto-closing unterminated citations at EOF.
|
||||
pub fn strip_citations(text: &str) -> (String, Vec<String>) {
|
||||
let mut parser = CitationStreamParser::new();
|
||||
let mut out = parser.push_str(text);
|
||||
@@ -146,4 +152,21 @@ mod tests {
|
||||
assert_eq!(visible, "abc");
|
||||
assert_eq!(citations, vec!["one".to_string(), "two".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_citations_auto_closes_unterminated_citation_at_eof() {
|
||||
let (visible, citations) = strip_citations("x<citation>y");
|
||||
|
||||
assert_eq!(visible, "x");
|
||||
assert_eq!(citations, vec!["y".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn citation_parser_does_not_support_nested_tags() {
|
||||
let (visible, citations) =
|
||||
strip_citations("a<citation>x<citation>y</citation>z</citation>b");
|
||||
|
||||
assert_eq!(visible, "az</citation>b");
|
||||
assert_eq!(citations, vec!["x<citation>y".to_string()]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,6 +52,16 @@ where
|
||||
!specs.is_empty(),
|
||||
"InlineHiddenTagParser requires at least one tag spec"
|
||||
);
|
||||
for spec in &specs {
|
||||
assert!(
|
||||
!spec.open.is_empty(),
|
||||
"InlineHiddenTagParser requires non-empty open delimiters"
|
||||
);
|
||||
assert!(
|
||||
!spec.close.is_empty(),
|
||||
"InlineHiddenTagParser requires non-empty close delimiters"
|
||||
);
|
||||
}
|
||||
Self {
|
||||
specs,
|
||||
pending: String::new(),
|
||||
@@ -290,4 +300,24 @@ mod tests {
|
||||
assert_eq!(out.extracted[0].tag, Tag::B);
|
||||
assert_eq!(out.extracted[0].content, "y");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "non-empty open delimiters")]
|
||||
fn generic_inline_parser_rejects_empty_open_delimiter() {
|
||||
let _ = InlineHiddenTagParser::new(vec![InlineTagSpec {
|
||||
tag: Tag::A,
|
||||
open: "",
|
||||
close: "</a>",
|
||||
}]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "non-empty close delimiters")]
|
||||
fn generic_inline_parser_rejects_empty_close_delimiter() {
|
||||
let _ = InlineHiddenTagParser::new(vec![InlineTagSpec {
|
||||
tag: Tag::A,
|
||||
open: "<a>",
|
||||
close: "",
|
||||
}]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,3 @@
|
||||
//! Streaming parsers for text that arrives in chunks.
|
||||
//!
|
||||
//! This crate is intentionally small and dependency-free. It provides:
|
||||
//! - a generic [`StreamTextParser`] trait for incremental text parsers, and
|
||||
//! - reusable parsers for hidden inline tags such as `<citation>...</citation>`.
|
||||
//!
|
||||
//! See the crate `README.md` for usage examples and extension guidance.
|
||||
|
||||
mod citation;
|
||||
mod inline_hidden_tag;
|
||||
mod stream_text;
|
||||
|
||||
@@ -58,6 +58,11 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed a raw byte chunk.
|
||||
///
|
||||
/// If the chunk contains invalid UTF-8, this returns an error and rolls back the entire
|
||||
/// pushed chunk so callers can decide how to recover without the inner parser seeing a partial
|
||||
/// prefix from that chunk.
|
||||
pub fn push_bytes(
|
||||
&mut self,
|
||||
chunk: &[u8],
|
||||
@@ -143,7 +148,31 @@ where
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> P {
|
||||
/// Return the wrapped parser if no undecoded UTF-8 bytes are buffered.
|
||||
///
|
||||
/// Use [`Self::finish`] first if you want to flush buffered text into the wrapped parser.
|
||||
pub fn into_inner(self) -> Result<P, Utf8StreamParserError> {
|
||||
if self.pending_utf8.is_empty() {
|
||||
return Ok(self.inner);
|
||||
}
|
||||
match std::str::from_utf8(&self.pending_utf8) {
|
||||
Ok(_) => Ok(self.inner),
|
||||
Err(err) => {
|
||||
if let Some(error_len) = err.error_len() {
|
||||
return Err(Utf8StreamParserError::InvalidUtf8 {
|
||||
valid_up_to: err.valid_up_to(),
|
||||
error_len,
|
||||
});
|
||||
}
|
||||
Err(Utf8StreamParserError::IncompleteUtf8AtEof)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the wrapped parser without validating or flushing buffered undecoded bytes.
|
||||
///
|
||||
/// This may drop a partial UTF-8 code point that was buffered across chunk boundaries.
|
||||
pub fn into_inner_lossy(self) -> P {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
@@ -154,6 +183,7 @@ mod tests {
|
||||
use super::Utf8StreamParserError;
|
||||
use crate::CitationStreamParser;
|
||||
use crate::StreamTextChunk;
|
||||
use crate::StreamTextParser;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
@@ -223,6 +253,31 @@ mod tests {
|
||||
assert!(tail.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_stream_parser_rolls_back_entire_chunk_when_invalid_byte_follows_valid_prefix() {
|
||||
let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
|
||||
|
||||
let err = match parser.push_bytes(b"ok\xFF") {
|
||||
Ok(out) => panic!("invalid byte should error, got output: {out:?}"),
|
||||
Err(err) => err,
|
||||
};
|
||||
assert_eq!(
|
||||
err,
|
||||
Utf8StreamParserError::InvalidUtf8 {
|
||||
valid_up_to: 2,
|
||||
error_len: 1,
|
||||
}
|
||||
);
|
||||
|
||||
let next = match parser.push_bytes(b"!") {
|
||||
Ok(out) => out,
|
||||
Err(err) => panic!("parser should recover after rollback: {err}"),
|
||||
};
|
||||
|
||||
assert_eq!(next.visible_text, "!");
|
||||
assert!(next.extracted.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_stream_parser_errors_on_incomplete_code_point_at_eof() {
|
||||
let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
|
||||
@@ -239,4 +294,36 @@ mod tests {
|
||||
};
|
||||
assert_eq!(err, Utf8StreamParserError::IncompleteUtf8AtEof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_stream_parser_into_inner_errors_when_partial_code_point_is_buffered() {
|
||||
let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
|
||||
|
||||
let out = match parser.push_bytes(&[0xC3]) {
|
||||
Ok(out) => out,
|
||||
Err(err) => panic!("partial code point should be buffered: {err}"),
|
||||
};
|
||||
assert!(out.is_empty());
|
||||
|
||||
let err = match parser.into_inner() {
|
||||
Ok(_) => panic!("buffered partial code point should be rejected"),
|
||||
Err(err) => err,
|
||||
};
|
||||
assert_eq!(err, Utf8StreamParserError::IncompleteUtf8AtEof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_stream_parser_into_inner_lossy_drops_buffered_partial_code_point() {
|
||||
let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
|
||||
|
||||
let out = match parser.push_bytes(&[0xC3]) {
|
||||
Ok(out) => out,
|
||||
Err(err) => panic!("partial code point should be buffered: {err}"),
|
||||
};
|
||||
assert!(out.is_empty());
|
||||
|
||||
let mut inner = parser.into_inner_lossy();
|
||||
let tail = inner.finish();
|
||||
assert!(tail.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user