Code Monkey home page Code Monkey logo

Comments (1)

sweep-ai avatar sweep-ai commented on July 3, 2024

🚀 Here's the PR! #49

See Sweep's progress at the progress dashboard!
💎 Sweep Pro: I'm using GPT-4. You have unlimited GPT-4 tickets. (tracking ID: 30a61e5cf0)

Actions (click)

  • ↻ Restart Sweep

Sandbox Execution ✓

Here are the sandbox execution logs prior to making any changes:

Sandbox logs for c7a522a
Checking docs/DOCS_README.md for syntax errors... ✅ docs/DOCS_README.md has no syntax errors! 1/1 ✓
Checking docs/DOCS_README.md for syntax errors...
✅ docs/DOCS_README.md has no syntax errors!

Sandbox passed on the latest main, so sandbox checks will be enabled for this issue.


Step 1: 🔎 Searching

I found the following snippets in your repository. I will now analyze these snippets and come up with a plan.

Some code snippets I think are relevant in decreasing order of relevance (click to expand). If some file is missing from here, you can mention the path in the ticket description.

@classmethod
def prepare_by_seed(cls, train_seeds=[1,2,3,4,5], train_size=16, dev_size=1000,
divide_eval_per_seed=True, eval_seed=2023, **kwargs):
data_args = dotdict(train_size=train_size, eval_seed=eval_seed, dev_size=dev_size, test_size=0, **kwargs)
dataset = cls(**data_args)
eval_set = dataset.dev
eval_sets, train_sets = [], []
examples_per_seed = dev_size // len(train_seeds) if divide_eval_per_seed else dev_size
eval_offset = 0
for train_seed in train_seeds:
data_args.train_seed = train_seed
dataset.reset_seeds(**data_args)
eval_sets.append(eval_set[eval_offset:eval_offset+examples_per_seed])
train_sets.append(dataset.train)
assert len(eval_sets[-1]) == examples_per_seed, len(eval_sets[-1])
assert len(train_sets[-1]) == train_size, len(train_sets[-1])
if divide_eval_per_seed:
eval_offset += examples_per_seed

class Evaluate:
def __init__(self, *, devset, metric=None, num_threads=1, display_progress=False,
display_table=False, display=True, max_errors=5):
self.devset = devset
self.metric = metric
self.num_threads = num_threads
self.display_progress = display_progress
self.display_table = display_table
self.display = display
self.max_errors = max_errors
self.error_count = 0
self.error_lock = threading.Lock()
def _execute_single_thread(self, wrapped_program, devset, display_progress):
ncorrect = 0
ntotal = 0
reordered_devset = []
pbar = tqdm.tqdm(total=len(devset), dynamic_ncols=True, disable=not display_progress)
for idx, arg in devset:
example_idx, example, prediction, score = wrapped_program(idx, arg)
reordered_devset.append((example_idx, example, prediction, score))
ncorrect += score
ntotal += 1
self._update_progress(pbar, ncorrect, ntotal)
pbar.close()
return reordered_devset, ncorrect, ntotal
def _execute_multi_thread(self, wrapped_program, devset, num_threads, display_progress):
ncorrect = 0
ntotal = 0
reordered_devset = []
with ThreadPoolExecutor(max_workers=num_threads) as executor:
futures = {executor.submit(wrapped_program, idx, arg) for idx, arg in devset}
pbar = tqdm.tqdm(total=len(devset), dynamic_ncols=True, disable=not display_progress)
for future in as_completed(futures):
example_idx, example, prediction, score = future.result()
reordered_devset.append((example_idx, example, prediction, score))
ncorrect += score
ntotal += 1
self._update_progress(pbar, ncorrect, ntotal)
pbar.close()
return reordered_devset, ncorrect, ntotal
def _update_progress(self, pbar, ncorrect, ntotal):
pbar.set_description(f"Average Metric: {ncorrect} / {ntotal} ({round(100 * ncorrect / ntotal, 1)})")
pbar.update()
def __call__(self, program, metric=None, devset=None, num_threads=None,
display_progress=None, display_table=None, display=None,
return_all_scores=False):
metric = metric if metric is not None else self.metric
devset = devset if devset is not None else self.devset
num_threads = num_threads if num_threads is not None else self.num_threads
display_progress = display_progress if display_progress is not None else self.display_progress
display_table = display_table if display_table is not None else self.display_table
display = self.display if display is None else display
display_progress = display_progress and display
display_table = display_table if display else False
def wrapped_program(example_idx, example):
# NOTE: TODO: Won't work if threads create threads!
creating_new_thread = threading.get_ident() not in dsp.settings.stack_by_thread
if creating_new_thread:
dsp.settings.stack_by_thread[threading.get_ident()] = list(dsp.settings.main_stack)
# print(threading.get_ident(), dsp.settings.stack_by_thread[threading.get_ident()])
# print(type(example), example)
try:
prediction = program(**example.inputs())
score = metric(example, prediction) # FIXME: TODO: What's the right order? Maybe force name-based kwargs!
return example_idx, example, prediction, score
except Exception as e:
with self.error_lock:
self.error_count += 1
current_error_count = self.error_count
if current_error_count >= self.max_errors:
raise e
print(f"Error for example in dev set: \t\t {e}")
return example_idx, example, dict(), 0.0
finally:
if creating_new_thread:
del dsp.settings.stack_by_thread[threading.get_ident()]
devset = list(enumerate(devset))
if num_threads == 1:
reordered_devset, ncorrect, ntotal = self._execute_single_thread(wrapped_program, devset, display_progress)
else:
reordered_devset, ncorrect, ntotal = self._execute_multi_thread(wrapped_program, devset, num_threads, display_progress)
if display:
print(f"Average Metric: {ncorrect} / {ntotal} ({round(100 * ncorrect / ntotal, 1)}%)")
predicted_devset = sorted(reordered_devset)
# data = [{**example, **prediction, 'correct': score} for example, prediction, score in zip(reordered_devset, preds, scores)]
data = [merge_dicts(example, prediction) | {'correct': score} for _, example, prediction, score in predicted_devset]
df = pd.DataFrame(data)
# Truncate every cell in the DataFrame
df = df.applymap(truncate_cell)
# Rename the 'correct' column to the name of the metric function
metric_name = metric.__name__
df.rename(columns={'correct': metric_name}, inplace=True)
if display_table:
if isinstance(display_table, int):
df_to_display = df.head(display_table).copy()
truncated_rows = len(df) - display_table
else:
df_to_display = df.copy()
truncated_rows = 0
styled_df = configure_dataframe_display(df_to_display, metric_name)
ipython_display(styled_df)
if truncated_rows > 0:
# Simplified message about the truncated rows
message = f"""
<div style='
text-align: center;
font-size: 16px;
font-weight: bold;
color: #555;
margin: 10px 0;'>
... {truncated_rows} more rows not displayed ...
</div>
"""
ipython_display(HTML(message))
if return_all_scores:
return round(100 * ncorrect / ntotal, 2), [score for *_, score in predicted_devset]

dspy/README.md

Lines 15 to 25 in c7a522a

**DSPy** is the framework for solving advanced tasks with language models (LMs) and retrieval models (RMs). **DSPy** unifies techniques for **prompting** and **fine-tuning** LMs — and approaches for **reasoning**, **self-improvement**, and **augmentation with retrieval and tools**. All of these are expressed through modules that compose and learn.
To make this possible:
- **DSPy** provides **composable and declarative modules** for instructing LMs in a familiar Pythonic syntax. It upgrades "prompting techniques" like chain-of-thought and self-reflection from hand-adapted _string manipulation tricks_ into truly modular _generalized operations that learn to adapt to your task_.
- **DSPy** introduces an **automatic compiler that teaches LMs** how to conduct the declarative steps in your program. Specifically, the **DSPy compiler** will internally _trace_ your program and then **craft high-quality prompts for large LMs (or train automatic finetunes for small LMs)** to teach them the steps of your task.
The **DSPy compiler** _bootstraps_ prompts and finetunes from minimal data **without needing manual labels for the intermediate steps** in your program. Instead of brittle "prompt engineering" with hacky string manipulation, you can explore a systematic space of modular and trainable pieces.


Step 2: ⌨️ Coding

Create docs/evaluate.md with contents:
• Start by creating a new markdown file named `evaluate.md` in the `docs` folder.
• Begin the documentation by providing a brief introduction to the `evaluate` module, its purpose, and its importance in the context of the DSPy framework. This information can be drawn from the README.md file.
• Describe the `Evaluate` class, its methods, and their functionalities. Make sure to cover all the methods including `__init__`, `_execute_single_thread`, `_execute_multi_thread`, `_update_progress`, and `__call__`.
• Explain the parameters of each method and their expected values.
• Include code snippets from the `dspy/evaluate/evaluate.py` file to illustrate the usage of the `Evaluate` class and its methods.
• Provide examples of how to use the `evaluate` module. These examples can be drawn from the jupyter notebooks.
• Conclude the documentation by summarizing the key points and highlighting the benefits of using the `evaluate` module in the DSPy framework.
  • Running GitHub Actions for docs/evaluate.mdEdit
Check docs/evaluate.md with contents:

Ran GitHub Actions for 04ec956b1d46eced483cbb0acd5375c171a360cd:

Modify docs/DOCS_README.md with contents:
• Add a reference to the `evaluate.md` file in the `DOCS_README.md` file.
• This can be done by adding a new line under the relevant section (e.g., "Module Documentation") with the text "- [Evaluate Module](evaluate.md): Documentation for the `evaluate` module in the `dspy` folder."
--- 
+++ 
@@ -9,6 +9,10 @@
 making changes to it and make contributions. DSPy is made by the thriving community
 behind it, and you're always welcome to make contributions to the project and the
 documentation.
+
+## Module Documentation
+
+- [Evaluate Module](evaluate.md): Documentation for the `evaluate` module in the `dspy` folder.
 
 ## Build Docs
 
  • Running GitHub Actions for docs/DOCS_README.mdEdit
Check docs/DOCS_README.md with contents:

Ran GitHub Actions for edc9a64d16aab719faae486c04b89bab176a2e43:


Step 3: 🔁 Code Review

I have finished reviewing the code for completeness. I did not find errors for sweep/ensure_evaluate_in_the_dspy_folder_has_d.


🎉 Latest improvements to Sweep:

  • We just released a dashboard to track Sweep's progress on your issue in real-time, showing every stage of the process – from search to planning and coding.
  • Sweep uses OpenAI's latest Assistant API to plan code changes and modify code! This is 3x faster and significantly more reliable as it allows Sweep to edit code and validate the changes in tight iterations, the same way as a human would.
  • Try using the GitHub issues extension to create Sweep issues directly from your editor! GitHub Issues and Pull Requests.

💡 To recreate the pull request edit the issue title or description. To tweak the pull request, leave a comment on the pull request.
Join Our Discord

from dspy.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.