Skip to content

Commit 45a6804

Browse files
committed
Add --xml flag
Closes #15
1 parent 4810ef7 commit 45a6804

File tree

3 files changed

+102
-9
lines changed

3 files changed

+102
-9
lines changed

README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ For background on this project see [Building files-to-prompt entirely using Clau
1212
## Installation
1313

1414
Install this tool using `pip`:
15+
1516
```bash
1617
pip install files-to-prompt
1718
```
@@ -29,11 +30,13 @@ This will output the contents of every file, with each file preceded by its rela
2930
### Options
3031

3132
- `--include-hidden`: Include files and folders starting with `.` (hidden files and directories).
33+
3234
```bash
3335
files-to-prompt path/to/directory --include-hidden
3436
```
3537

3638
- `--ignore-gitignore`: Ignore `.gitignore` files and include all files.
39+
3740
```bash
3841
files-to-prompt path/to/directory --ignore-gitignore
3942
```
@@ -101,6 +104,26 @@ Contents of file3.txt
101104
---
102105
```
103106

107+
### XML Output
108+
109+
Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.
110+
111+
To structure the output in this way, use the optional `--xml` flag, which will produce output like this:
112+
113+
```xml
114+
Here are some documents for you to reference for your task:
115+
116+
<document path="my_directory/file1.txt">
117+
Contents of file1.txt
118+
</document>
119+
120+
<document path="my_directory/file2.txt">
121+
Contents of file2.txt
122+
</document>
123+
...
124+
</documents>
125+
```
126+
104127
## Development
105128

106129
To contribute to this tool, first checkout the code. Then create a new virtual environment:
@@ -118,6 +141,7 @@ pip install -e '.[test]'
118141
```
119142

120143
To run the tests:
144+
121145
```bash
122146
pytest
123147
```

files_to_prompt/cli.py

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import os
2-
import click
32
from fnmatch import fnmatch
43

4+
import click
5+
56

67
def should_ignore(path, gitignore_rules):
78
for rule in gitignore_rules:
@@ -23,7 +24,12 @@ def read_gitignore(path):
2324

2425

2526
def process_path(
26-
path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns
27+
path,
28+
include_hidden,
29+
ignore_gitignore,
30+
gitignore_rules,
31+
ignore_patterns,
32+
xml,
2733
):
2834
if os.path.isfile(path):
2935
try:
@@ -69,11 +75,16 @@ def process_path(
6975
with open(file_path, "r") as f:
7076
file_contents = f.read()
7177

72-
click.echo(file_path)
73-
click.echo("---")
74-
click.echo(file_contents)
75-
click.echo()
76-
click.echo("---")
78+
if xml:
79+
click.echo(f'<document path="{file_path}">')
80+
click.echo(file_contents)
81+
click.echo("</document>")
82+
else:
83+
click.echo(file_path)
84+
click.echo("---")
85+
click.echo(file_contents)
86+
click.echo()
87+
click.echo("---")
7788
except UnicodeDecodeError:
7889
warning_message = (
7990
f"Warning: Skipping file {file_path} due to UnicodeDecodeError"
@@ -100,8 +111,13 @@ def process_path(
100111
default=[],
101112
help="List of patterns to ignore",
102113
)
114+
@click.option(
115+
"--xml",
116+
is_flag=True,
117+
help="Output in XML format suitable for Claude's long context window.",
118+
)
103119
@click.version_option()
104-
def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
120+
def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, xml):
105121
"""
106122
Takes one or more paths to files or directories and outputs every file,
107123
recursively, each one preceded with its filename like this:
@@ -114,13 +130,43 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
114130
path/to/file2.py
115131
---
116132
...
133+
134+
If the `--xml` flag is provided, the output will be structured as follows:
135+
136+
Here are some documents for you to reference for your task:
137+
138+
<documents>
139+
<document path="path/to/file1.txt">
140+
Contents of file1.txt
141+
</document>
142+
143+
<document path="path/to/file2.txt">
144+
Contents of file2.txt
145+
</document>
146+
...
147+
</documents>
117148
"""
118149
gitignore_rules = []
119150
for path in paths:
120151
if not os.path.exists(path):
121152
raise click.BadArgumentUsage(f"Path does not exist: {path}")
122153
if not ignore_gitignore:
123154
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
155+
if xml and path == paths[0]:
156+
click.echo("""
157+
Here are some documents for you to reference for your task:
158+
159+
<documents>
160+
""")
161+
124162
process_path(
125-
path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns
163+
path,
164+
include_hidden,
165+
ignore_gitignore,
166+
gitignore_rules,
167+
ignore_patterns,
168+
xml,
126169
)
170+
171+
if xml:
172+
click.echo("</documents>")

tests/test_files_to_prompt.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
2+
23
from click.testing import CliRunner
4+
35
from files_to_prompt.cli import cli
46

57

@@ -186,3 +188,24 @@ def test_binary_file_warning(tmpdir):
186188
"Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError"
187189
in stderr
188190
)
191+
192+
193+
def test_xml_format(tmpdir):
194+
runner = CliRunner()
195+
with tmpdir.as_cwd():
196+
os.makedirs("test_dir")
197+
with open("test_dir/file1.txt", "w") as f:
198+
f.write("Contents of file1")
199+
with open("test_dir/file2.txt", "w") as f:
200+
f.write("Contents of file2")
201+
202+
result = runner.invoke(cli, ["test_dir", "--xml"])
203+
assert result.exit_code == 0
204+
assert "<documents>" in result.output
205+
assert '<document path="test_dir/file1.txt">' in result.output
206+
assert "Contents of file1" in result.output
207+
assert "</document>" in result.output
208+
assert '<document path="test_dir/file2.txt">' in result.output
209+
assert "Contents of file2" in result.output
210+
assert "</document>" in result.output
211+
assert "</documents>" in result.output

0 commit comments

Comments
 (0)