diff --git a/docs/tools/file-system.md b/docs/tools/file-system.md index b2afdc8065..bb521dba52 100644 --- a/docs/tools/file-system.md +++ b/docs/tools/file-system.md @@ -36,8 +36,9 @@ glob patterns. ## 2. `read_file` (ReadFile) `read_file` reads and returns the content of a specified file. This tool handles -text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it -can read specific line ranges. Other binary file types are generally skipped. +text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, +OGG, FLAC), and PDF files. For text files, it can read specific line ranges. +Other binary file types are generally skipped. - **Tool name:** `read_file` - **Display name:** ReadFile @@ -53,16 +54,16 @@ can read specific line ranges. Other binary file types are generally skipped. - For text files: Returns the content. If `offset` and `limit` are used, returns only that slice of lines. Indicates if content was truncated due to line limits or line length limits. - - For image and PDF files: Returns the file content as a base64-encoded data - structure suitable for model consumption. + - For image, audio, and PDF files: Returns the file content as a + base64-encoded data structure suitable for model consumption. - For other binary files: Attempts to identify and skip them, returning a message indicating it's a generic binary file. - **Output:** (`llmContent`): - For text files: The file content, potentially prefixed with a truncation message (e.g., `[File content truncated: showing lines 1-100 of 500 total lines...]\nActual file content...`). - - For image/PDF files: An object containing `inlineData` with `mimeType` and - base64 `data` (e.g., + - For image/audio/PDF files: An object containing `inlineData` with `mimeType` + and base64 `data` (e.g., `{ inlineData: { mimeType: 'image/png', data: 'base64encodedstring' } }`). - For other binary files: A message like `Cannot display content of binary file: /path/to/data.bin`. diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts index 8def7e2dd6..4c0aed9565 100644 --- a/packages/core/src/tools/read-file.ts +++ b/packages/core/src/tools/read-file.ts @@ -154,7 +154,7 @@ export class ReadFileTool extends BaseDeclarativeTool< super( ReadFileTool.Name, 'ReadFile', - `Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), and PDF files. For text files, it can read specific line ranges.`, + `Reads and returns the content of a specified file. If the file is large, the content will be truncated. The tool's response will clearly indicate if truncation has occurred and will provide details on how to read more of the file using the 'offset' and 'limit' parameters. Handles text, images (PNG, JPG, GIF, WEBP, SVG, BMP), audio files (MP3, WAV, AIFF, AAC, OGG, FLAC), and PDF files. For text files, it can read specific line ranges.`, Kind.Read, { properties: { diff --git a/packages/core/src/tools/read-many-files.test.ts b/packages/core/src/tools/read-many-files.test.ts index 47aa6b73d1..20a06763c2 100644 --- a/packages/core/src/tools/read-many-files.test.ts +++ b/packages/core/src/tools/read-many-files.test.ts @@ -431,7 +431,7 @@ describe('ReadManyFilesTool', () => { ).toBe(true); expect(result.returnDisplay).toContain('**Skipped 1 item(s):**'); expect(result.returnDisplay).toContain( - '- `document.pdf` (Reason: asset file (image/pdf) was not explicitly requested by name or extension)', + '- `document.pdf` (Reason: asset file (image/pdf/audio) was not explicitly requested by name or extension)', ); }); diff --git a/packages/core/src/tools/read-many-files.ts b/packages/core/src/tools/read-many-files.ts index b484c30e81..85c6c4b4aa 100644 --- a/packages/core/src/tools/read-many-files.ts +++ b/packages/core/src/tools/read-many-files.ts @@ -251,7 +251,11 @@ ${finalExclusionPatternsForDescription const fileType = await detectFileType(filePath); - if (fileType === 'image' || fileType === 'pdf') { + if ( + fileType === 'image' || + fileType === 'pdf' || + fileType === 'audio' + ) { const fileExtension = path.extname(filePath).toLowerCase(); const fileNameWithoutExtension = path.basename( filePath, @@ -269,7 +273,7 @@ ${finalExclusionPatternsForDescription filePath, relativePathForDisplay, reason: - 'asset file (image/pdf) was not explicitly requested by name or extension', + 'asset file (image/pdf/audio) was not explicitly requested by name or extension', }; } } @@ -504,7 +508,7 @@ export class ReadManyFilesTool extends BaseDeclarativeTool< super( ReadManyFilesTool.Name, 'ReadManyFiles', - `Reads content from multiple files specified by glob patterns within a configured target directory. For text files, it concatenates their content into a single string. It is primarily designed for text-based files. However, it can also process image (e.g., .png, .jpg) and PDF (.pdf) files if their file names or extensions are explicitly included in the 'include' argument. For these explicitly requested non-text files, their data is read and included in a format suitable for model consumption (e.g., base64 encoded). + `Reads content from multiple files specified by glob patterns within a configured target directory. For text files, it concatenates their content into a single string. It is primarily designed for text-based files. However, it can also process image (e.g., .png, .jpg), audio (e.g., .mp3, .wav), and PDF (.pdf) files if their file names or extensions are explicitly included in the 'include' argument. For these explicitly requested non-text files, their data is read and included in a format suitable for model consumption (e.g., base64 encoded). This tool is useful when you need to understand or analyze a collection of files, such as: - Getting an overview of a codebase or parts of it (e.g., all TypeScript files in the 'src' directory). @@ -513,7 +517,7 @@ This tool is useful when you need to understand or analyze a collection of files - Gathering context from multiple configuration files. - When the user asks to "read all files in X directory" or "show me the content of all Y files". -Use this tool when the user's query implies needing the content of several files simultaneously for context, analysis, or summarization. For text files, it uses default UTF-8 encoding and a '--- {filePath} ---' separator between file contents. The tool inserts a '--- End of content ---' after the last file. Ensure glob patterns are relative to the target directory. Glob patterns like 'src/**/*.js' are supported. Avoid using for single files if a more specific single-file reading tool is available, unless the user specifically requests to process a list containing just one file via this tool. Other binary files (not explicitly requested as image/PDF) are generally skipped. Default excludes apply to common non-text files (except for explicitly requested images/PDFs) and large dependency directories unless 'useDefaultExcludes' is false.`, +Use this tool when the user's query implies needing the content of several files simultaneously for context, analysis, or summarization. For text files, it uses default UTF-8 encoding and a '--- {filePath} ---' separator between file contents. The tool inserts a '--- End of content ---' after the last file. Ensure glob patterns are relative to the target directory. Glob patterns like 'src/**/*.js' are supported. Avoid using for single files if a more specific single-file reading tool is available, unless the user specifically requests to process a list containing just one file via this tool. Other binary files (not explicitly requested as image/audio/PDF) are generally skipped. Default excludes apply to common non-text files (except for explicitly requested images/audio/PDFs) and large dependency directories unless 'useDefaultExcludes' is false.`, Kind.Read, parameterSchema, true, // isOutputMarkdown diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index e0015e1051..4d51206565 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -49,6 +49,7 @@ describe('fileUtils', () => { let testTextFilePath: string; let testImageFilePath: string; let testPdfFilePath: string; + let testAudioFilePath: string; let testBinaryFilePath: string; let nonexistentFilePath: string; let directoryPath: string; @@ -64,6 +65,7 @@ describe('fileUtils', () => { testTextFilePath = path.join(tempRootDir, 'test.txt'); testImageFilePath = path.join(tempRootDir, 'image.png'); testPdfFilePath = path.join(tempRootDir, 'document.pdf'); + testAudioFilePath = path.join(tempRootDir, 'audio.mp3'); testBinaryFilePath = path.join(tempRootDir, 'app.exe'); nonexistentFilePath = path.join(tempRootDir, 'nonexistent.txt'); directoryPath = path.join(tempRootDir, 'subdir'); @@ -671,6 +673,8 @@ describe('fileUtils', () => { actualNodeFs.unlinkSync(testImageFilePath); if (actualNodeFs.existsSync(testPdfFilePath)) actualNodeFs.unlinkSync(testPdfFilePath); + if (actualNodeFs.existsSync(testAudioFilePath)) + actualNodeFs.unlinkSync(testAudioFilePath); if (actualNodeFs.existsSync(testBinaryFilePath)) actualNodeFs.unlinkSync(testBinaryFilePath); }); @@ -771,6 +775,28 @@ describe('fileUtils', () => { expect(result.returnDisplay).toContain('Read pdf file: document.pdf'); }); + it('should process an audio file', async () => { + const fakeMp3Data = Buffer.from('fake mp3 data'); + actualNodeFs.writeFileSync(testAudioFilePath, fakeMp3Data); + mockMimeGetType.mockReturnValue('audio/mpeg'); + const result = await processSingleFileContent( + testAudioFilePath, + tempRootDir, + new StandardFileSystemService(), + ); + expect( + (result.llmContent as { inlineData: unknown }).inlineData, + ).toBeDefined(); + expect( + (result.llmContent as { inlineData: { mimeType: string } }).inlineData + .mimeType, + ).toBe('audio/mpeg'); + expect( + (result.llmContent as { inlineData: { data: string } }).inlineData.data, + ).toBe(fakeMp3Data.toString('base64')); + expect(result.returnDisplay).toContain('Read audio file: audio.mp3'); + }); + it('should read an SVG file as text when under 1MB', async () => { const svgContent = `