# -- fastcore.tools --
# Implementation of Anthropic file and text editor tools
def rg(
argstr:str, # All args to the command, will be split with shlex
disallow_re:str=None, # optional regex which, if matched on argstr, will disallow the command
allow_re:str=None # optional regex which, if not matched on argstr, will disallow the command
):
"Run the `rg` command with the args in `argstr`"
def sed(
argstr:str, # All args to the command, will be split with shlex
disallow_re:str=None, # optional regex which, if matched on argstr, will disallow the command
allow_re:str=None # optional regex which, if not matched on argstr, will disallow the command
):
"Run the `sed` command with the args in `argstr` (e.g for reading a section of a file)"
def view(
path:str, # Path to directory or file to view
view_range:tuple[int,int]=None, # Optional 1-indexed (start, end) line range for files, end=-1 for EOF. Do NOT use unless it's known that the file is too big to keep in context—simply view the WHOLE file when possible
nums:bool=False, # Whether to show line numbers
skip_folders:tuple[str,...]=('_proc','__pycache__') # Folder names to skip when listing directories
):
'View directory or file contents with optional line range and numbers'
def create(
path: str, # Path where the new file should be created
file_text: str, # Content to write to the file
overwrite:bool=False # Whether to overwrite existing files
) -> str:
'Creates a new file with the given content at the specified path'
def insert(
path: str, # Path to the file to modify
insert_line: int, # Line number where to insert (0-based indexing)
new_str: str # Text to insert at the specified line
) -> str:
'Insert new_str at specified line number'
def str_replace(
path: str, # Path to the file to modify
old_str: str, # Text to find and replace
new_str: str # Text to replace with
) -> str:
'Replace first occurrence of old_str with new_str in file'
def strs_replace(
path:str, # Path to the file to modify
old_strs:list[str], # List of strings to find and replace
new_strs:list[str], # List of replacement strings (must match length of old_strs)
):
"Replace for each str pair in old_strs,new_strs"
res = [str_replace(path, old, new) for (old,new) in
def replace_lines(
path:str, # Path to the file to modify
start_line:int, # Starting line number to replace (1-based indexing)
end_line:int, # Ending line number to replace (1-based indexing, inclusive)
new_content:str, # New content to replace the specified lines
):
"Replace lines in file using start and end line-numbers (index starting at 1)"
def move_lines(
path: str, # Path to the file to modify
start_line: int, # Starting line number to move (1-based)
end_line: int, # Ending line number to move (1-based, inclusive)
dest_line: int, # Destination line number (1-based, where lines will be inserted before)
) -> str:
"Move lines from start_line:end_line to before dest_line"Answerai tools study
Summary: answerai tools signatures
# -- claudette.text_editor --
# Other implementation of Anthropic text editor tools
def view(path:str, # The path to the file or directory to view
view_range:tuple[int,int]=None, # Optional array of two integers specifying the start and end line numbers to view. Line numbers are 1-indexed, and -1 for the end line means read to the end of the file. This parameter only applies when viewing files, not directories.
nums:bool=False # Optionally prefix all lines of the file with a line number
) -> str:
'Examine the contents of a file or list the contents of a directory. It can read the entire file or a specific range of lines. With or without line numbers.'
def create(path: str, # The path where the new file should be created
file_text: str, # The text content to write to the new file
overwrite:bool=False # Allows overwriting an existing file
) -> str:
'Creates a new file with the given text content at the specified path'
def insert(path: str, # The path to the file to modify
insert_line: int, # The line number after which to insert the text (0 for beginning of file)
new_str: str # The text to insert
) -> str:
'Insert text at a specific line number in a file.'
def str_replace(path: str, # The path to the file to modify
old_str: str, # The text to replace (must match exactly, including whitespace and indentation)
new_str: str # The new text to insert in place of the old text
) -> str:
'Replace a specific string in a file with a new string. This is used for making precise edits.'# -- ipykernelhelper --
# Get the html content of a web page using the cloudscraper library to bypass Cloudflare's anti-bot page
# and convert it to markdown prepared for insertion in the LLM context
def read_url(url: str, as_md: bool = True,
extract_section: bool = True, selector: str = None,
math_mode: str = None):
"""This functions extracts a web page information for LLM ingestion
1. Downloads a web page
2. Parses HTML
3. Optionally extracts a specific section (fragment or CSS selector)
4. Converts MathML → LaTeX
5. Optionally converts HTML → Markdown
6. Convert code sections to fenced markdown blocks
7. Makes image URLs absolute
8. Returns the processed text
"""# -- dialoghelper.core --
# Tools to edit the Jupyter notebook
def dialoghelper_explain_dialog_editing(
)->str: # Detailed documention on dialoghelper dialog editing
"Call this to get a detailed explanation of how dialog editing is done in dialoghelper. Always use if doing anything non-trivial, or if dialog editing has not previously occured in this session"
return """# dialoghelper dialog editing functionality
This guide consolidates understanding of how dialoghelper tools work together. Individual tool schemas are already in context—this adds architectural insight and usage patterns.
## Core Concepts
- **Dialog addressing**: All functions accepting `dname` resolve paths relative to current dialog (no leading `/`) or absolute from Solveit's runtime data path (with leading `/`). The `.ipynb` extension is never included.
- **Message addressing**: Messages have stable `id` strings (e.g., `_a9cb5512`). The current executing message's id is in `__msg_id`. Tools use `id` for targeting; `find_msg_id()` retrieves current.
- **Implicit state**: After `add_msg`/`update_msg`, `__msg_id` is updated to the new/modified message. This enables chaining: successive `add_msg` calls create messages in sequence.
## Tool Workflow Patterns
### Reading dialog state
- `view_dlg` — fastest way to see entire dialog structure with line numbers for editing
- `find_msgs` — search with regex, filter by type/errors/changes
- `read_msg` — navigate relative to current message
- `read_msgid` — direct access when you have the id
**Key insight**: Messages above the current prompt are already in LLM context. Use read tools only for: (1) getting line numbers for edits, (2) accessing messages below current prompt, (3) accessing other dialogs.
### Modifying dialogs
- `add_msg` — placement can be `add_after`/`add_before` (relative to current) or `at_start`/`at_end` (absolute)
- **NB** When not passing a message id, it defaults to the *current* message. So if you call it multiple times with no message id, the messages will be added in REVERSE! Instead, get the return value of `add_msg` after each call, and use that for the next call
- `update_msg` — partial updates; only pass fields to change
- `del_msg` — use sparingly, only when explicitly requested
`copy_msg` → `paste_msg` — for moving/duplicating messages within running dialogs.
## Non-decorated Functions Worth Knowing
There are additional functions available that can be added to fenced blocks, or the user may add as tools; they are not included in schemas by default.
**Browser integration:**
- `add_html(content)` — inject HTML with `hx-swap-oob` into live browser DOM
- `iife(code)` — execute JavaScript immediately in browser
- `fire_event(evt, data)` / `event_get(evt)` — trigger/await browser events
**Content helpers:**
- `url2note(url, ...)` — fetch URL as markdown, add as note message
- `mermaid(code)` / `enable_mermaid()` — render mermaid diagrams
- `add_styles(s)` — apply solveit's MonsterUI styling to HTML
**Dangerous (not exposed by default):**
- `_add_msg_unsafe(content, run=True, ...)` — add AND execute message (code or prompt)
- `run_msg(ids)` — queue messages for execution
- `rm_dialog(name)` — delete entire dialog
## Important Patterns
### Key Principles
1. **Always re-read before editing.** Past tool call results in chat history are TRUNCATED. Never rely on line numbers from earlier in the conversation—call `read_msgid(id, nums=True)` immediately before any edit operation.
2. **Work backwards.** When making multiple edits to a message, start from the end and work towards the beginning. This prevents line number shifts from invalidating your planned edits.
3. **Don't guess when tools fail.** If a tool call returns an error, STOP and ask for clarification. Do not retry with guessed parameters.
4. **Verify after complex edits.** After significant changes, re-read the affected region to confirm the edit worked as expected before proceeding.
### Typical Workflow
```
1. read_msgid(id, nums=True) # Get current state with line numbers
2. Identify lines to change
3. msg_replace_lines(...) or msg_str_replace(...) # Make edit
4. If more edits needed: re-read, then repeat from step 2
```
### Tool Selection
- **`msg_replace_lines`**: Best for replacing/inserting contiguous blocks. Use `view_range` on read to focus on the area.
- **`msg_str_replace`**: Best for targeted single small string replacements when you know the exact text.
- **`msg_strs_replace`**: Best for multiple small independent replacements in one call.
- **`msg_insert_line`**: Best for adding new content without replacing existing lines.
- **`msg_del_lines`**: Best for removing content.
**Rough rule of thumb:** Prefer `msg_replace_lines` over `msg_str(s)_replace` unless there's >1 match to change or it's just a word or two. Use the insert/delete functions for inserting/deleting; don't use `msg_str(s)_replace` for that.
### Common Mistakes to Avoid
- Using line numbers from a truncated earlier result
- Making multiple edits without re-reading between them
- Guessing line numbers when a view_range was truncated
- Always call `read_msgid(id, nums=True)` first to get accurate line numbers
- String-based tools (`msg_str_replace`, `msg_strs_replace`) fail if the search string appears zero or multiple times—use exact unique substrings."""
def curr_dialog(
with_messages:bool=False, # Include messages as well?
dname:str='' # Dialog to get info for; defaults to current dialog
):
"Get the current dialog info."
def view_dlg(
dname:str='', # Dialog to get info for; defaults to current dialog
msg_type:str=None, # optional limit by message type ('code', 'note', or 'prompt')
nums:bool=False, # Whether to show line numbers
include_output:bool=False, # Include output in returned dict?
trunc_out:bool=True, # Middle-out truncate code output to 100 characters (only applies if `include_output`)?
trunc_in:bool=False, # Middle-out truncate cell content to 80 characters?
):
"Concise XML view of all messages (optionally filtered by type), not including metadata. Often it is more efficient to call this to see the whole dialog at once (including line numbers if needed), instead of running `find_msgs` or `read_msg` multiple times."
return find_msgs(msg_type=msg_type, dname=dname, as_xml=True, nums=nums,
include_meta=False, include_output=include_output, trunc_out=trunc_out, trunc_in=trunc_in)
def msg_idx(
id:str=None, # Message id to find (defaults to current message)
dname:str='' # Dialog to get message index from; defaults to current dialog
):
"Get absolute index of message in dialog."
def find_msgs(
re_pattern:str='', # Optional regex to search for (re.DOTALL+re.MULTILINE is used)
msg_type:str=None, # optional limit by message type ('code', 'note', or 'prompt')
use_case:bool=False, # Use case-sensitive matching?
use_regex:bool=True, # Use regex matching?
only_err:bool=False, # Only return messages that have errors?
only_exp:bool=False, # Only return messages that are exported?
only_chg:bool=False, # Only return messages that have changed vs git HEAD?
ids:str='', # Optionally filter by comma-separated list of message ids
limit:int=None, # Optionally limit number of returned items
include_output:bool=True, # Include output in returned dict?
include_meta:bool=True, # Include all additional message metadata
as_xml:bool=False, # Use concise unescaped XML output format
nums:bool=False, # Show line numbers?
trunc_out:bool=False, # Middle-out truncate code output to 100 characters?
trunc_in:bool=False, # Middle-out truncate cell content to 80 characters?
headers_only:bool=False, # Only return note messages that are headers (first line only); cannot be used together with `header_section`
header_section:str=None, # Find section starting with this header; returns it plus all children (i.e until next header of equal or more significant level)
dname:str='' # Dialog to get info for; defaults to current dialog
)->list[dict]: # Messages in requested dialog that contain the given information
"""Often it is more efficient to call `view_dlg` to see the whole dialog at once, so you can use it all from then on, instead of using `find_msgs`.
{dname}
Message ids are identical to those in LLM chat history, so do NOT call this to view a specific message if it's in the chat history--instead use `read_msgid`.
Do NOT use find_msgs to view message content in the current dialog above the current prompt -- these are *already* provided in LLM context, so just read the content there directly. (NB: LLM context only includes messages *above* the current prompt, whereas `find_msgs` can access *all* messages.)
To refer to a found message from code or tools, use its `id` field."""
def read_msg(
n:int=-1, # Message index (if relative, +ve is downwards)
relative:bool=True, # Is `n` relative to current message (True) or absolute (False)?
id:str=None, # Message id to find (defaults to current message)
view_range:list[int,int]=None, # Optional 1-indexed (start, end) line range for files, end=-1 for EOF
nums:bool=False, # Whether to show line numbers
dname:str='' # Dialog to get info for; defaults to current dialog
):
"""Get the message indexed in the current dialog.
NB: Messages in the current dialog above the current message are *already* visible; use this only when you need line numbers for editing operations, or for messages not in the current dialog or below the current message.
- To get the exact message use `n=0` and `relative=True` together with `id`.
- To get a relative message use `n` (relative position index).
- To get the nth message use `n` with `relative=False`, e.g `n=0` first message, `n=-1` last message.
{dname}"""
def read_msgid(
id:str, # Message id to find
view_range:list[int,int]=None, # Optional 1-indexed (start, end) line range for files, end=-1 for EOF
nums:bool=False, # Whether to show line numbers
dname:str='' # Dialog to get message from; defaults to current dialog
):
"""Get message `id`. Message IDs can be view directly in LLM chat history/context, or found in `find_msgs` results."""
def add_msg(
content:str, # Content of the message (i.e the message prompt, code, or note text)
placement:str='add_after', # Can be 'at_start' or 'at_end', and for default dname can also be 'add_after' or 'add_before'
id:str=None, # id of message that placement is relative to (if None, uses current message; note: each add_msg updates "current" to the newly created message)
msg_type: str='note', # Message type, can be 'code', 'note', or 'prompt'
output:str='', # Prompt/code output; Code outputs must be .ipynb-compatible JSON array
time_run: str | None = '', # When was message executed
is_exported: int | None = 0, # Export message to a module?
skipped: int | None = 0, # Hide message from prompt?
i_collapsed: int | None = 0, # Collapse input?
o_collapsed: int | None = 0, # Collapse output?
heading_collapsed: int | None = 0, # Collapse heading section?
pinned: int | None = 0, # Pin to context?
dname:str='' # Dialog to get info for; defaults to current dialog. If passed, provide `id` or use `placement='at_start'`/`'at_end'`
)->str: # Message ID of newly created message
"""Add/update a message to the queue to show after code execution completes.
**NB**: when creating multiple messages in a row, after the 1st message set `id` to the result of the last `add_msg` call,
otherwise messages will appear in the dialog in REVERSE order.
{dname}"""
def del_msg(
id:str=None, # id of message to delete
dname:str='', # Dialog to get info for; defaults to current dialog
log_changed:bool=False # Add a note showing the deleted content?
):
"Delete a message from the dialog. DO NOT USE THIS unless you have been explicitly instructed to delete messages."
def update_msg(
id:str=None, # id of message to update (if None, uses current message)
msg:Optional[Dict]=None, # Dictionary of field keys/values to update
dname:str='', # Dialog to get info for; defaults to current dialog
log_changed:bool=False, # Add a note showing the diff?
**kwargs):
"""Update an existing message. Provide either `msg` OR field key/values to update.
- Use `content` param to update contents.
- Only include parameters to update--missing ones will be left unchanged.
{dname}"""
def run_msg(
ids:str=None, # Comma-separated ids of message(s) to execute
dname:str='' # Running dialog to get info for; defaults to current dialog. (Note dialog *must* be running for this function)
):
"Adds a message to the run queue. Use read_msg to see the output once it runs."
def copy_msg(
ids:str=None, # Comma-separated ids of message(s) to copy
cut:bool=False, # Cut message(s)? (If not, copies)
dname:str='' # Running dialog to copy messages from; defaults to current dialog. (Note dialog *must* be running for this function)
):
"Add `ids` to clipboard."
def paste_msg(
id:str=None, # Message id to paste next to
after:bool=True, # Paste after id? (If not, pastes before)
dname:str='' # Running dialog to copy messages from; defaults to current dialog. (Note dialog *must* be running for this function)
):
"Paste clipboard msg(s) after/before the current selected msg (id)."
def run_code_interactive(
code:str # Code to have user run
):
"""Insert code into user's dialog and request for the user to run it. Use other tools where possible,
but if they can not find needed information, *ALWAYS* use this instead of guessing or giving up.
IMPORTANT: This tool is TERMINAL - after calling it, you MUST stop all tool usage
and wait for user response. Never call additional tools after this one."""
def msg_insert_line(text, insert_line:int, new_str:str):
"Insert text at specific line num in message. {besure}\n{dname}"
def msg_str_replace(text, old_str:str, new_str:str):
"Replace first occurrence of old_str with new_str in a message.\n{dname}"
def msg_strs_replace(text, old_strs:list[str], new_strs:list[str]):
"Replace multiple strings simultaneously in a message.\n{dname}"
def msg_replace_lines(text, start_line:int, end_line:int=None, new_content:str=''):
"Replace line range in msg with new content. {besure}\n{dname}"
def msg_del_lines(text, start_line:int, end_line:int=None):
"Delete line range from a message. {besure}\n{dname}"# -- dialoghelper.core --
# Import context and code
def url2note(
url:str, # URL to read
extract_section:bool=True, # If url has an anchor, return only that section
selector:str=None, # Select section(s) using BeautifulSoup.select (overrides extract_section)
ai_img:bool=True, # Make images visible to the AI
split_re:str='' # Regex to split content into multiple notes, set to '' for single note
):
"Read URL as markdown, and add note(s) below current message with the result"
def ctx_folder(
path:Path='.', # Path to collect
types:str|list='py,doc', # list or comma-separated str of ext types from: py, js, java, c, cpp, rb, r, ex, sh, web, doc, cfg
out=False, # Include notebook cell outputs?
raw=True, # Add raw message, or note?
exts:str|list=None, # list or comma-separated str of exts to include (overrides `types`)
**kwargs
):
"Convert folder to XML context and place in a new message"
def ctx_repo(
owner:str, # GitHub repo owner
repo:str, # GitHub repo name
types:str|list='py,doc', # list or comma-separated str of ext types from: py, js, java, c, cpp, rb, r, ex, sh, web, doc, cfg
exts:str|list=None, # list or comma-separated str of exts to include (overrides `types`)
out=False, # Include notebook cell outputs?
raw=True, # Add raw message, or note?
**kwargs
):
"Convert GitHub repo to XML context and place in a new message"
def ctx_symfile(sym):
"Add note with filepath and contents for a symbol's source file"
def ctx_symfile(sym):
"Add note with filepath and contents for a symbol's source file"
def ctx_symfolder(
sym, # Symbol to get folder context from
**kwargs):
"Add raw message with folder context for a symbol's source file location"
def ctx_sympkg(
sym, # Symbol to get folder context from
**kwargs):
"Add raw message with repo context for a symbol's root package"
def gist_file(gist_id:str):
"Get the first file from a gist"
def ast_grep(
pattern:str, # ast-grep pattern to search, e.g "post($A, data=$B, $$$)"
path:str=".", # path to recursively search for files
lang:str="python" # language to search/scan
): # json format from calling `ast-grep --json=compact
"""Use `ast-grep` to find code patterns by AST structure (not text).
Pattern syntax:
- $VAR captures single nodes, $$$ captures multiple
- Match structure directly: `def $FUNC($$$)` finds any function; `class $CLASS` finds classes regardless of inheritance
- DON'T include `:` - it's concrete syntax, not AST structure
- Whitespace/formatting ignored - matches structural equivalence
Examples: `import $MODULE` (find imports); `$OBJ.$METHOD($$$)` (find method calls); `await $EXPR` (find await expressions)
Useful for: Refactoring—find all uses of deprecated APIs or changed signatures; Security review—locate SQL queries, file operations, eval calls; Code exploration—understand how libraries are used across codebase; Pattern analysis—find async functions, error handlers, decorators; Better than regex—handles multi-line code, nested structures, respects syntax"""
def import_string(
code:str, # Code to import as a module
name:str # Name of module to create
):
def import_gist(
gist_id:str, # user/id or just id of gist to import as a module
mod_name:str=None, # module name to create (taken from gist filename if not passed)
add_global:bool=True, # add module to caller's globals?
import_wildcard:bool=False, # import all exported symbols to caller's globals
create_msg:bool=False # Add a message that lists usable tools
):
"Import gist directly from string without saving to disk"
def update_gist(gist_id:str, content:str):
"Update the first file in a gist with new content"tracetool is an LLM tool that traces function execution and captures variable snapshots after each line runs, using Python 3.12’s sys.monitoring for low-overhead tracing.
For each call to the target function (including recursive calls), the LLM receives a stack trace showing how that call was reached, plus a per-line trace dict mapping each executed source line to its hit count and variable snapshots. Variables that don’t change are shown as a single (type, repr) tuple, while those that evolve across iterations appear as a list of snapshots.
This is particularly useful when the LLM needs to understand unfamiliar code by seeing exactly what happens step-by-step, debug issues by watching how variables change, verify loop behavior by confirming iteration counts and accumulator values, or explore recursive functions where each call gets its own trace entry
# -- dialoghelper.capture --
# Screen capture
def capture_tool(timeout:int=15):
"Capture the screen. Re-call this function to get the most recent screenshot, as needed. Use default timeout where possible"# -- dialoghelper.tracetools --
# Trace function execution
def tracetool(
sym: str, # Dotted symbol path of callable to run
args: list=None, # Positional args for callable (JSON values passed directly)
kwargs: dict=None, # Keyword args for callable (JSON values passed directly)
target_func: str=None # Dotted symbol path of function to trace (defaults to sym)
)->list[tuple[str, dict[str, tuple[int, dict[str, tuple|list]]]]]: # List of (stack_str,trace_dict); trace_dict maps source snippets to (hit_count, variables), unchanged vars collapsed to single tuple
"""
Trace execution using sys.monitoring (Python 3.12+), returning a list of per-call traces.
Return:
- list of length <= 10
- one element per call to `target_func` (including recursion)
- each element is: (stack_str, trace_dict)
stack_str: call stack string (filtered so `fn` is the shallowest frame shown)
trace_dict: {
"<source snippet for AST-line>": ( hit_count, { "var": [ (type_name, truncated_repr), ... up to 10 ], ... } ),
...}
Semantics:
- "Line" means an AST-level line: separate statements (even if on one physical line via `;`).
- Compound statements are keyed by their header only.
- Unchanged variables → `('type', 'repr')` tuple, changed variables → `[('type', 'repr'), ...]` list.
- Comprehensions are treated as a line node and are monitored, including inside the comprehension frame, with per-iteration snapshots.
- Snapshots are recorded after each line finishes, so assignments show updated values.
"""# -- dialoghelper.tmux --
# Terminal : view tmux buffers
def pane(
n:int=None, # Number of scrollback lines to capture, in addition to visible area (None uses default_tmux_lines, which is 500 if not otherwise set)
pane:int=None, # Pane number to capture from
session:str=None, # Session name to target
window:int=None, # Window number to target
**kwargs
):
'Grab the tmux history in plain text'
def list_panes(
session:str=None, # Session name to list panes from
window:int=None, # Window number to list panes from
**kwargs
):
'List panes for a session/window (or current if none specified)'
def panes(
session:str=None, # Session name to target
window:int=None, # Window number to target
n:int=None, # Number of scrollback lines to capture
**kwargs
):
'Grab history from all panes in a session/window'
def list_windows(
session:str=None, # Session name to list windows from
**kwargs
):
'List all windows in a session'
def windows(
session:str=None, # Session name to target
n:int=None, # Number of scrollback lines to capture
**kwargs
):
'Grab history from all panes in all windows of a session'
def list_sessions(**kwargs):
'List all tmux sessions'
def sessions(
n:int=None, # Number of scrollback lines to capture
**kwargs
):
'Grab history from all panes in all windows of all sessions'# -- contextkit.read --
def read_text(url, # URL to read
): # Text from page
"Get text from `url`"
def read_link(url: str, # URL to read
heavy: bool = False, # Use headless browser (requires extra setup steps before use)
sel: Optional[str] = None, # Css selector to pull content from
useJina: bool = False, # Use Jina for the markdown conversion
ignore_links: bool = False, # Whether to keep links or not
):
"Reads a url and converts to markdown"
def read_gist(url:str # gist URL, of gist to read
):
"Returns raw gist content, or None"
def read_gh_file(url:str # GitHub URL of the file to read
):
"Reads the contents of a file from its GitHub URL"
def read_file(path:str):
"returns file contents"
def read_dir(path: str, # path to read
unicode_only: bool = True, # ignore non-unicode files
included_patterns: List[str] = ["*"], # glob pattern of files to include
excluded_patterns: List[str] = [".git/**"], # glob pattern of files to exclude
verbose: bool = False, # log paths of files being read
as_dict: bool = False # returns dict of {path,content}
) -> Union[str, Dict[str, str]]: # returns string with contents of files read
"""Reads files in path, returning a dict with the filenames and contents if as_dict=True, otherwise concatenating file contents into a single string. Takes optional glob patterns for files to include or exclude."""
def read_pdf(file_path: str # path of PDF file to read
) -> str:
"Reads the text of a PDF with PdfReader"
def read_google_sheet(url: str # URL of a Google Sheet to read
):
"Reads the contents of a Google Sheet into text"
def read_gdoc(url: str # URL of Google Doc to read
):
"Gets the text content of a Google Doc using html2text"
def read_arxiv(url:str, # arxiv PDF URL, or arxiv abstract URL, or arxiv ID
save_pdf:bool=False, # True, will save the downloaded PDF
save_dir:str='.' # directory in which to save the PDF
):
"Get paper information from arxiv URL or ID, optionally saving PDF to disk"
def read_gh_repo(path_or_url:str, # Repo's GitHub URL, or GH SSH address, or file path
as_dict:bool=True, # if True, will return repo contents {path,content} dict
verbose:bool=False # if True, will log paths of files being read
):
"Repo contents from path, GH URL, or GH SSH address"# -- llms-txt --
def parse_llms_file(txt):
"Parse llms.txt file contents in `txt` to an `AttrDict`"
def llms_txt2ctx(
fname:str, # File name to read
optional:bool_arg=False, # Include 'optional' section?
n_workers:int=None, # Number of threads to use for parallel downloading
save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout
):
"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section."# -- playwrightnb --
def read_page(url, pause=50, timeout=5000, stealth=False, page=None):
"Return contents of `url` and its iframes using Playwright"
def url2md(url, sel=None, pause=50, timeout=5000, stealth=False, page=None):
"Read `url` with `read_page`"
def get2md(url, sel=None, **kwargs):
"Read `url` with `httpx.get`"# -- toolslm.xml --
def read_file(fname, max_size=None, sigs_only=False, **kwargs):
"Read file content, converting notebooks to XML if needed"
def files2ctx(
fnames:list[Union[str,Path]], # List of file names to add to context
srcs:Optional[list]=None, # Use the labels instead of `fnames`
max_size:int=None, # Skip files larger than this (bytes)
out:bool=True, # Include notebook cell outputs?
ids:bool=True, # Include cell ids in notebooks?
nums:bool=False, # Include line numbers in notebook cell source?
sigs_only:bool=False, # Only include signatures and docstrings (where supported by `codesigs` lib)
**kwargs
)->str: # XML for LM context
"Convert files to XML context, handling notebooks"
def folder2ctx(
path:Union[str,Path], # Folder to read
prefix:bool=False, # Include Anthropic's suggested prose intro?
out:bool=True, # Include notebook cell outputs?
include_base:bool=True, # Include full path in src?
title:str=None, # Optional title attr for Documents element
max_size:int=100_000, # Skip files larger than this (bytes)
max_total:int=10_000_000, # Max total output size in bytes
readme_first:bool=False, # Prioritize README files at start of context?
files_only:bool=False, # Return dict of {filename: size} instead of context?
sigs_only:bool=False, # Return signatures instead of full text? (where supported by `codesigs` lib)
ids:bool=True, # Include cell ids in notebooks?
**kwargs
)->Union[str,dict]:
"Convert folder contents to XML context, handling notebooks"
def sym2file(sym):
"Return md string with filepath and contents for a symbol's source file"
def sym2folderctx(
sym,
types:str|list='py', # List or comma-separated str of ext types from: py, js, java, c, cpp, rb, r, ex, sh, web, doc, cfg
skip_file_re:str=r'^_mod', # Skip files matching regex
**kwargs
):
"Return folder context for a symbol's source file location"
def sym2pkgpath(sym):
"Get root package path for a symbol"
def sym2pkgctx(
sym,
types:str|list='py', # List or comma-separated str of ext types from: py, js, java, c, cpp, rb, r, ex, sh, web, doc, cfg
skip_file_re:str=r'^_mod', # Skip files matching regex
skip_folder_re:str=r'^(\.|__)', # Skip folders matching regex
**kwargs
):
"Return contents of files in a symbol's root package"
def repo2ctx(
owner:str, # GitHub repo owner or "owner/repo" or a full github URL
repo:str=None, # GitHub repo name (leave empty if using "owner/repo" or URL format for owner param)
ref:str=None, # Git ref (branch/tag/sha) (get from URL not provided); defaults to repo's default branch
folder:str=None, # Only include files under this path (get from URL not provided)
show_filters:bool=True, # Include filter info in title?
token:str=None, # GitHub token (uses GITHUB_TOKEN env var if None)
**kwargs # Passed to `folder2ctx`
)->Union[str,dict]: # XML for LM context, or dict of file sizes
"Convert GitHub repo to XML context without cloning"# -- toolslm.inspecttools --
def importmodule(
mod: str, # The module to import (e.g. 'torch.nn.functional')
caller_symbol:str = '__msg_id' # The name of the special variable to find the correct caller namespace
):
"""Import a module into the caller's global namespace so it's available for `symsrc`, `symval`, `symdir`, etc.
Use this before inspecting or using symbols from modules not yet imported."""
def symsrc(
sym: str # Dotted symbol path (e.g `Interval` or `sympy.sets.sets.Interval`) or "_last" for previous result
):
"""Get the source code for a symbol.
Examples:
- `symsrc("Interval")` -> source code of Interval class if it's already imported
- `symsrc("sympy.sets.sets.Interval")` -> source code of Interval class
- `symsrc("_last")` -> source of object from previous tool call
- For dispatchers or registries of callables: `symnth("module.dispatcher.funcs", n) then symsrc("_last")`"""
def symtype(
syms: str # Comma separated str list of dotted symbol paths (e.g `'Interval,a'` or `'sympy.sets.sets.Interval'`); "_last" for prev result
):
"""Get the type of a symbol and set `_last`.
Examples:
- `symtype("sympy.sets.sets.Interval")` -> `<class 'type'>`
- `symtype("doesnotexist")` -> `'SymbolNotFound`
- `symtype("_last")` -> type of previous result"""
def symval(
syms: str # Comma separated str list of dotted symbol paths (e.g `Interval` or `sympy.sets.sets.Interval`); "_last" for prev result
):
"""List of repr of symbols' values.
Examples:
- `symval("sympy.sets.sets.Interval")` -> `[<class 'sympy.sets.sets.Interval'>]`
- `symval("some_dict.keys")` -> `[dict_keys([...])]`
- `symval("a,notexist")` -> `['foo','SymbolNotFound']`"""
def symdir(
sym: str, # Dotted symbol path (e.g `Interval` or `sympy.sets.sets.Interval`) or "_last" for previous result
exclude_private: bool=False # Filter out attrs starting with "_"
):
"""Get dir() listing of a symbol's attributes and set `_last`. E.g: `symdir("sympy.Interval")` -> `['__add__', '__and__', ...]`"""
def symnth(
sym: str, # Dotted symbol path to a dict or object with .values()
n: int # Index into the values (0-based)
):
"""Get the nth value from a dict (or any object with .values()). Sets `_last` so you can chain with `symsrc("_last")` etc.
Examples:
- `symnth("dispatcher.funcs", 12)` -> 13th registered function
- `symnth("dispatcher.funcs", 0); symsrc("_last")` -> source of first handler"""
def symlen(
sym: str # Dotted symbol path or "_last" for previous result
):
"Returns the length of the given symbol"
def symslice(
sym: str, # Dotted symbol path or "_last" for previous result
start: int, # Starting index for slice
end: int # Ending index for slice
):
"Returns the contents of the symbol from the given start to the end."
def symsearch(
sym:str, # Dotted symbol path or "_last" for previous result
term:str, # Search term (exact string or regex pattern)
regex:bool=True, # If True, regex search; if False, exact match
flags:int=0 # Regex flags (e.g., re.IGNORECASE)
):
"""Search contents of symbol, which is assumed to be str for regex, or iterable for non-regex.
Regex mode returns (match, start, end) tuples; otherwise returns (item, index) tuples"""
def symset(
val: str # Value to assign to _ai_sym
):
"Set _ai_sym to the given value"
def symfiles_folder(
sym:str, # Dotted symbol path or "_last" for previous result
**kwargs
):
"Return XML context of files in the folder containing `sym`'s definition"
def symfiles_package(
sym:str, # Dotted symbol path or "_last" for previous result
**kwargs
):
"Return XML context of all files in `sym`'s top-level package"# -- toolslm.funccall --
def get_schema(
f:Union[callable,dict], # Function to get schema for
pname='input_schema', # Key name for parameters
evalable=False, # stringify defaults that can't be literal_eval'd?
skip_hidden=False # skip parameters starting with '_'?
)->dict: # {'name':..., 'description':..., pname:...}
"Generate JSON schema for a class, function, or method"
def mk_tool(dispfn, tool):
"Create a callable function from a JSON schema tool definition"
def call_func(fc_name, fc_inputs, ns, raise_on_err=True):
"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
async def call_func_async(fc_name, fc_inputs, ns, raise_on_err=True):
"Awaits the function `fc_name` with the given `fc_inputs` using namespace `ns`."# -- toolslm.funccall --
def python(
code:str, # Code to execute
glb:Optional[dict]=None, # Globals namespace
loc:Optional[dict]=None, # Locals namespace
timeout:int=3600 # Maximum run time in seconds
):
"Executes python `code` with `timeout` and returning final expression (similar to IPython)."# -- toolslm.download --
# Fetch web content for LLMs
def read_md(url, rm_comments=True, rm_details=True, **kwargs):
"Read text from `url` and clean with `clean_docs`"
def html2md(s:str, ignore_links=True):
"Convert `s` from HTML to markdown"
def read_html(url, # URL to read
sel=None, # Read only outerHTML of CSS selector `sel`
rm_comments=True, # Removes HTML comments
rm_details=True, # Removes `<details>` tags
multi=False, # Get all matches to `sel` or first one
wrap_tag=None, #If multi, each selection wrapped with <wrap_tag>content</wrap_tag>
ignore_links=True,
): # Cleaned markdown
"Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
def get_llmstxt(url, optional=False, n_workers=None):
"Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
def find_docs(url):
"If available, return LLM-friendly llms.txt context or markdown file location from `url`"
def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):
"If available, return LLM-friendly llms.txt context or markdown file response for `url`"# -- toolslm.shell --
# toolslm.shell Minimal IPython shell for code execution get_shell, run_cell
def get_shell()->TerminalInteractiveShell:
"Get a `TerminalInteractiveShell` with minimal functionality"
def run_cell(self:TerminalInteractiveShell, cell, timeout=None):
"Wrapper for original `run_cell` which adds timeout and output capture"# -- ipykernel --
def scrape_url(url):
"Get the html content of a web page using the cloudscraper library to bypass Cloudflare's anti-bot page."
return create_scraper().get(url)
def read_url(url: str, as_md: bool = True, extract_section: bool = True, selector: str = None, math_mode: str = None):
"""This functions extracts a web page information for LLM ingestion
1. Downloads a web page
2. Parses HTML
3. Optionally extracts a specific section (fragment or CSS selector)
4. Converts MathML → LaTeX
5. Optionally converts HTML → Markdown
6. Convert code sections to fenced markdown blocks
7. Makes image URLs absolute
8. Returns the processed text
"""
o = scrape_url(url)
res, ctype = o.text, o.headers.get('content-type').split(';')[0]
soup = BeautifulSoup(res, 'lxml')
if selector:
res = '\n\n'.join(str(s) for s in soup.select(selector))
elif extract_section:
parsed = urlparse(url)
if parsed.fragment:
section = soup.find(id=parsed.fragment)
if section:
elements = [section]
current = section.next_sibling
while current:
if hasattr(current, 'name') and current.name == section.name: break
elements.append(current)
current = current.next_sibling
res = ''.join(str(el) for el in elements)
else:
res = ''
else:
res = str(soup)
if math_mode:
res_soup = BeautifulSoup(res, 'lxml')
_convert_math(res_soup, math_mode)
res = str(res_soup)
if as_md and ctype == 'text/html':
h = HTML2Text()
h.body_width = 0
# Handle code blocks
h.mark_code = True
res = h.handle(res)
def _f(m): return f'```\n{dedent(m.group(1))}\n```'
res = re.sub(r'\[code]\s*\n(.*?)\n\[/code]', _f, res or '', flags=re.DOTALL).strip()
# Handle image urls
res = _absolutify_imgs(res, urljoin(url, s['href'] if (s := soup.find('base')) else ''))
# Handle math blocks
if math_mode == 'safe':
res = res.replace('\\\\(', '\\(').replace('\\\\)', '\\)')
return resSolveit tool exploration
Here is the list of unique Answerai library names sorted in alphabetical order:
Librairies providing tools
| Library | GitHub Repo URL | Library Description |
|---|---|---|
| contextkit | * https://github.com/AnswerDotAI/ContextKit | * Useful LLM contexts ready to be used in AIMagic. Library for gathering context from various sources (URLs, GitHub repos, Google Docs, PDFs, arXiv) for feeding to LLMs. |
| dialoghelper | * https://github.com/AnswerDotAI/dialoghelper | * Helper functions for solveit dialogs. Provides functions for message manipulation, gist management, screen capture, and tool integration in the solveit environment. |
| execnb | * https://github.com/AnswerDotAI/execnb | * Execute a jupyter notebook, fast, without needing jupyter. Provides CaptureShell for running code and capturing outputs without a Jupyter server. |
| fastcore | * https://github.com/AnswerDotAI/fastcore | Python utilities and enhancements used across fast.ai projects. Includes foundational tools like @patch, @delegates, type dispatch, and LLM file/bash tools. |
| ipykernel_helper | PRIVATE REPO | Helper utilities for IPython/Jupyter kernels, providing read_url, transient display, enhanced completion, and namespace inspection for dialog environments. |
| llms_txt | * https://github.com/AnswerDotAI/llms-txt | * The llms.txt specification is open for community input. A GitHub repository hosts this informal overview. Tools for the /llms.txt standard helping language models use websites. |
| playwrightnb | https://github.com/AnswerDotAI/playwrightnb | Playwright browser automation integration for Jupyter notebooks, enabling headless browser control and web scraping. |
| toolslm | * https://github.com/AnswerDotAI/toolslm | * Tools to make language models a bit easier to use. Provides XML context creation, function schema generation, shell execution, and web content downloading for LLMs. |
Summary: Answer.AI Tools for LLM Development
LLM Tool Utilities
| Library | Purpose | Key Functions |
|---|---|---|
| fastcore.tools | File/bash operations designed for LLM tool loops | view, create, insert, str_replace, strs_replace, replace_lines, run_cmd, rg, sed |
| toolslm.funccall | Function calling / tool use | get_schema, call_func, call_func_async, python, mk_ns |
Context Preparation
| Library | Purpose | Key Functions |
|---|---|---|
| contextkit | Gather context from various sources | read_link, read_gh_repo, read_gh_file, read_gist, read_google_sheet, read_gdoc, read_dir, read_pdf, read_arxiv |
| toolslm.xml | Convert files/folders to XML for LLMs | folder2ctx, files2ctx, json_to_xml, docs_xml, nb2xml |
| toolslm.download | Fetch web content for LLMs | read_html, read_md, html2md, get_llmstxt, find_docs, read_docs |
| toolslm.md_hier | Parse markdown hierarchically | create_heading_dict, HeadingDict |
Message Formatting
| Library | Purpose | Key Functions |
|---|---|---|
| msglm | Create properly formatted messages for LLM APIs | mk_msg, mk_msgs, mk_msg_anthropic, mk_msg_openai, mk_ant_doc (supports text, images, PDFs, caching) |
Code Execution
| Library | Purpose | Key Functions |
|---|---|---|
| execnb | Execute Jupyter notebooks without Jupyter | CaptureShell, CaptureShell.run(), CaptureShell.execute(), read_nb, write_nb, new_nb |
| nbformat | Notebook format handling & validation | read, write, validate, format conversion |
| toolslm.shell | Minimal IPython shell for code execution | get_shell, run_cell |
Dialog/Session Management
| Library | Purpose | Key Functions |
|---|---|---|
| dialoghelper.core | Interact with solveit dialogs | read_msg, add_msg, update_msg, del_msg, run_msg, find_msgs, import_gist, url2note, ast_grep |
| dialoghelper.capture | Screen sharing with AI | setup_share, start_share, capture_screen, capture_tool |
Visual Overview
┌─────────────────────────────────────────────────────────────────┐
│ LLM Application Stack │
├─────────────────────────────────────────────────────────────────┤
│ USER INTERFACE │
│ dialoghelper (dialog management, screen capture) │
├─────────────────────────────────────────────────────────────────┤
│ MESSAGE LAYER │
│ msglm (format messages for Claude/OpenAI/etc) │
├─────────────────────────────────────────────────────────────────┤
│ CONTEXT LAYER │
│ contextkit (read from URLs, GitHub, Google, PDFs, arXiv) │
│ toolslm.xml (convert files/folders to XML) │
│ toolslm.download (fetch & clean web content) │
│ toolslm.md_hier (parse markdown structure) │
├─────────────────────────────────────────────────────────────────┤
│ TOOL EXECUTION LAYER │
│ fastcore.tools (file editing, bash commands) │
│ toolslm.funccall (function schemas, safe execution) │
│ toolslm.shell (IPython execution) │
│ execnb (notebook execution) │
├─────────────────────────────────────────────────────────────────┤
│ FORMAT LAYER │
│ nbformat (notebook format handling) │
└─────────────────────────────────────────────────────────────────┘
Key Themes
- Safety: Tools return errors as strings for LLMs to debug; file operations have safeguards
- LLM-Optimized Formats: XML for Claude, proper message structures for each API
- Single-Function Access: Most operations are one function call (e.g.,
read_gh_repo,folder2ctx) - Composability: Libraries work together - contextkit gathers content, toolslm formats it, msglm packages it for APIs
Details of the tools available in these libraries
contextkit
All functions follow th same design pattern: single required argument (location), optional parameters for customization, and return text or dict suitable for LLM consumption.
Web Content Functions
read_link(url, heavy=False, sel=None, useJina=False, ignore_links=False) - Reads a URL and converts to markdown - The heavy argument allows you to do a heavy scrape with a contactless browser using playwrightnb - sel - CSS selector to extract specific content - useJina=True - uses Jina.ai service for markdown conversion - ignore_links - whether to strip out links
read_url(...) - Deprecated alias for read_link()
read_text(url) - Get raw text from a URL (no markdown conversion)
read_html(url, sel=None, ...) - Fetch URL, optionally select CSS elements, convert to clean markdown
GitHub Functions
read_gist(url) - Returns raw gist content from a GitHub gist URL
read_gh_file(url) - Reads contents of a single file from its GitHub URL
read_gh_repo(path_or_url, as_dict=False, verbose=False) - Reads entire repo contents from GitHub URL, SSH address, or local path - Clones/caches repos automatically - Returns dict of {filepath: content} if as_dict=True, otherwise concatenated string
Google Services Functions
read_google_sheet(url) - Reads a Google Sheet into CSV text
read_gdoc(url) - Gets text content of a Google Doc converted to markdown
File System Functions
read_file(path) - Returns file contents as string
read_dir(path, unicode_only=True, included_patterns=["*"], excluded_patterns=[".git/**"], verbose=False, as_dict=False) - Reads files in a directory with glob pattern filtering - unicode_only=True - skip binary files - included_patterns - glob patterns to include - excluded_patterns - glob patterns to exclude - Returns dict or concatenated string with file markers
read_pdf(file_path) - Extracts text from PDF using pypdf
Academic Functions
read_arxiv(url, save_pdf=False, save_dir='.') - Get paper info from arXiv URL or ID - Returns dict with title, authors, summary, published date, links - Optionally downloads PDF and extracts LaTeX source - Returns structured metadata perfect for LLM context
dialoghelper
dialoghelper is a library from Answer.AI providing helper functions for the solveit dialog environment. It enables programmatic interaction with dialog cells, message editing, gist management, and screen capture.
Module 1: core (dialoghelper.core)
The main module with functions for interacting with solveit dialogs:
Basics - Variable/Context Management
| Function | Description |
|---|---|
find_var(var) |
Search for variable in all frames of the call stack |
set_var(var, val) |
Set variable value after finding it in call stack frames |
find_msg_id() |
Get current message ID from call stack (__msg_id) |
find_dname() |
Get current dialog name from call stack (__dialog_id) |
call_endp(path, ...) |
Call a solveit API endpoint |
curr_dialog(with_messages, dname) |
Get current dialog info |
msg_idx(msgid, dname) |
Get absolute index of message in dialog |
JavaScript/HTML Injection
| Function | Description |
|---|---|
add_scr(scr, oob) |
Swap a script element into the DOM |
iife(code) |
Wrap JavaScript code in IIFE and execute via add_html |
add_html(content, dname) |
Send HTML to browser to be swapped into DOM (uses hx-swap-oob) |
Event System
| Function | Description |
|---|---|
pop_data(idx, timeout) |
Pop data from a queue |
fire_event(evt, data) |
Fire a browser event |
event_get(evt, timeout, data) |
Fire event and wait for response data |
View/Edit Dialog Messages
| Function | Description |
|---|---|
find_msgs(re_pattern, msg_type, limit, ...) |
Find messages matching regex/type in current dialog |
read_msg(n, relative, msgid, view_range, nums, ...) |
Get message by index (absolute or relative) with optional line range |
add_msg(content, placement, msgid, msg_type, ...) |
Add a new message (note/code/prompt) at specified position |
del_msg(msgid, dname) |
Delete a message |
update_msg(msgid, msg, content, ...) |
Update an existing message’s content/properties |
run_msg(msgid, dname) |
Queue a message for execution |
url2note(url, ...) |
Read URL as markdown and add as note(s) below current message |
AST/Code Search
| Function | Description |
|---|---|
ast_py(code) |
Get an ast-grep SgRoot node for Python code |
ast_grep(pattern, path, lang) |
Use ast-grep to find pattern in files |
Text Edit Functions (for editing message content)
| Function | Description |
|---|---|
msg_insert_line(msgid, insert_line, new_str, ...) |
Insert text at specific line number |
msg_str_replace(msgid, old_str, new_str, ...) |
Replace first occurrence of string |
msg_strs_replace(msgid, old_strs, new_strs, ...) |
Replace multiple strings simultaneously |
msg_replace_lines(msgid, start_line, end_line, new_content, ...) |
Replace a range of lines |
Gist Management
| Function | Description |
|---|---|
load_gist(gist_id) |
Retrieve a GitHub gist |
gist_file(gist_id) |
Get the first file from a gist |
import_string(code, name) |
Import code string as a module |
is_usable_tool(func) |
Check if function can be used as LLM tool (has docstring + typed params) |
mk_toollist(syms) |
Create markdown list of tools with & notation |
import_gist(gist_id, mod_name, ...) |
Import gist directly as module, optionally wildcard import |
Tool Info
| Function | Description |
|---|---|
tool_info() |
Get information about available tools |
fc_tool_info() |
Get function-calling tool information |
Module 2: capture (dialoghelper.capture)
Screen capture functionality for sharing screens with the AI:
| Function | Description |
|---|---|
setup_share() |
Setup screen sharing (initializes the capture system) |
start_share() |
Start screen sharing session |
capture_screen(timeout=15) |
Capture the screen as a PIL image |
capture_tool(timeout=15) |
Capture screen for LLM tool use - returns image data suitable for vision models |
dialoghelper is the infrastructure that powers the solveit interactive environment, enabling: - Dialog manipulation: Add, edit, delete, search messages programmatically - Code execution: Queue and run code cells - Text editing: Line-based editing operations on message content - Gist integration: Import tools/modules directly from GitHub gists - Screen capture: Share your screen with the AI for visual understanding - AST search: Search code using ast-grep patterns
execnb
execnb is a fast.ai library for * * executing Jupyter notebooks without needing a Jupyter server or even having Jupyter installed.
Core execution:
- CaptureShell - runs Jupyter code and captures notebook outputs
- CaptureShell.run() - executes code strings and returns outputs
- CaptureShell.execute() - executes a notebook and saves it with outputs filled in
fastcore
fastcore provides helpful tools for running CLI commands and reading, modifying, and creating files in Python, primarily for AI’s in tool loops for automating tasks involving the filesystem.
Bash Tools
run_cmd(cmd, argstr='', disallow_re=None, allow_re=None) - Run cmd passing split argstr, optionally checking for allowed argstr - Can include regex patterns to allow/disallow certain argument patterns (e.g., blocking parent directories)
rg(argstr, disallow_re=None, allow_re=None) - Runs the ripgrep command for fast text searching across files - Supports regex-based argument filtering for safety
sed(argstr, disallow_re=None, allow_re=None) - Runs sed command for reading sections of files - Useful for viewing specific line ranges with or without line numbers
Text Edit Tools
Python implementations of the text editor tools from Anthropic. These tools are especially useful in an AI’s tool loop.
view(path, view_range=None, nums=False) - View directory or file contents with optional line range and numbers - Can specify 1-indexed line ranges and toggle line numbers
create(path, file_text, overwrite=False) - Creates a new file with the given content at the specified path - Safety feature: won’t overwrite by default
insert(path, insert_line, new_str) - Insert new_str at specified line number - Uses 0-based indexing
str_replace(path, old_str, new_str) - Replace first occurrence of old_str with new_str in file
strs_replace(path, old_strs, new_strs) - Replace for each str pair in old_strs, new_strs - Batch replacement of multiple string pairs
replace_lines(path, start_line, end_line, new_content) - Replace lines in file using start and end line-numbers (index starting at 1)
Special LLM-Friendly Features
These tools have special behavior around errors. Since these have been specifically designed for work with LLMs, any exceptions created from their use is returned as a string to help them debug their work. These tools are designed to be safe, predictable, and easy for LLMs to use in agentic workflows.
llms_txt
The llms-txt library provides a CLI and Python API to parse llms.txt files and create XML context files from them.
* * The llms_txt Python module provides the source code and helpers needed to create and use llms.txt files.
The llms-txt library provides: 1. Parsing - Convert llms.txt markdown files into structured Python data 2. XML Generation - Transform parsed data into LLM-friendly XML context (especially for Claude) 3. CLI Tool - Command-line utility for batch processing 4. URL Fetching - Automatically fetch and include content from linked URLs Core Parsing Functions
parse_llms_file(txt, optional=True) - * * Parse llms.txt file contents to create a data structure with the sections of an llms.txt file - * Returns an AttrDict with keys: title, summary, info, sections - optional=True - includes the optional section in the parsed output - Example: parsed.sections.Examples returns list of links in the Examples section
parse_link(txt) - * Extracts the title, URL, and optional description from a markdown link - Returns dict with title, url, and desc keys
Internal: _parse_llms(txt) - Lower-level parser that splits the file into start section and sectioned links - Returns tuple of (start_text, sections_dict)
XML Context Generation
mk_ctx(d, optional=True, n_workers=None) - * * Create a Project with a Section for each H2 part in the parsed data, optionally skipping the ‘optional’ section - * For LLMs such as Claude, XML format is preferred - Takes parsed llms.txt data structure and converts to XML - n_workers - parallel workers for fetching URLs
create_ctx(txt, optional=True, n_workers=None) - * Create an LLM context file with XML sections, suitable for systems such as Claude (this is what the CLI calls behind the scenes) - High-level function that parses and converts in one step
get_doc_content(url) - * * Fetch content from local file if in nbdev repo - Helper for retrieving document content from URLs or local paths
Command-Line Interface
llms_txt2ctx (CLI command) - * * After installation, llms_txt2ctx is available in your terminal - Usage: llms_txt2ctx llms.txt > llms.md - * * Pass –optional True to add the ‘optional’ section of the input file - * Uses the parsing helpers to process an llms.txt file and output an XML context file
File Format Specification
* * The llms.txt file spec contains: an H1 with the name of the project (the only required section), a blockquote with a short summary, zero or more markdown sections containing detailed information, and zero or more H2-delimited sections containing “file lists” of URLs
playwrightnb
* * playwrightnb provides quality-of-life helpers for interactive use of Playwright, particularly useful in Jupyter notebooks. It handles JavaScript rendering and other web complexities automatically.
Key Features
- Sync mode in Jupyter - Use Playwright synchronously for interactive exploration
- Automatic iframe handling - Returns both main content and iframe contents in a dict
- JavaScript support - Handles dynamically loaded content automatically
- Stealth mode - Optional bot detection avoidance
- CSS selectors - Extract specific page sections easily
- Markdown conversion - Built-in HTML to markdown conversion
- Flexible timeouts - Configurable pause and timeout parameters
Use cases: - Scraping JavaScript-heavy sites (Discord docs, dynamic help pages) - Extracting content from iframes - Interactive web exploration in notebooks - Quick conversion of web pages to markdown for LLM context
Core Page Functions
get_page(*args, stealth=False, **kwargs) - Get a Playwright page object - stealth=True - uses playwright-stealth to avoid bot detection - Returns an async page that can navigate to URLs
page_ready(page, pause=50, timeout=5000) - Wait until main content of page is ready - pause - milliseconds to wait (default: 50ms) - timeout - maximum wait time in milliseconds (default: 5000ms)
frames_ready(page, pause=50, timeout=5000) - Wait until all visible frames (if any) on page are ready - Handles dynamically loaded iframes
wait_page(page, pause=50, timeout=5000) - Wait until page and visible frames (if any) are ready - Combines page_ready and frames_ready
get_full_content(page) - Returns tuple of (page_content, iframes_dict) - iframes_dict maps iframe IDs to their HTML contents
High-Level Reading Functions
read_page_async(url, pause=50, timeout=5000, stealth=False, page=None) - * Return contents of url and its iframes using Playwright async - Handles JavaScript and dynamic content automatically - Returns tuple of (main_content, iframes_dict)
read_page(url, pause=50, timeout=5000, stealth=False, page=None) - * Return contents of url and its iframes using Playwright (sync version) - Same as read_page_async but synchronous for easier interactive use - Perfect for Jupyter notebooks
HTML to Markdown Conversion
h2md(h) - Convert HTML string to markdown using HTML2Text - Simple utility for converting any HTML to markdown
url2md_async(url, sel=None, pause=50, timeout=5000, stealth=False, page=None) - Read URL with read_page_async, optionally selecting CSS selector - Converts result to markdown - sel - CSS selector to extract specific content
url2md(url, sel=None, pause=50, timeout=5000, stealth=False, page=None) - Read URL with read_page (sync version) - Optionally select CSS selector and convert to markdown - * Great for accessing Discord’s JS-rendered docs or other dynamic content
get2md(url, sel=None, params=None, headers=None, cookies=None, auth=None, proxy=None, follow_redirects=False, verify=True, timeout=5.0, trust_env=True) - * Read URL with httpx.get (no JavaScript rendering) - Faster alternative when you don’t need JS rendering - Supports all standard HTTP options (headers, auth, proxies, etc.) - Optionally select CSS content with sel
toolslm
toolslm provides tools to make language models easier to use, focused on context creation and XML handling:
Context Creation Functions: - folder2ctx - generates XML context from files, useful for providing file contents to LLMs - json_to_xml - converts JSON/dict structures to XML format
Key modules (based on the repo structure): - xml - XML handling utilities - funccall - Function calling support - shell - Shell command integration - download - Download utilities - md_hier - Markdown hierarchy handling
The library is designed to help structure information (especially from codebases and files) into XML format that works well with Claude and other LLMs.
- xml (
toolslm.xml)
Provides functions for converting content to XML format optimized for LLMs (especially Claude/Anthropic):
| Function | Description |
|---|---|
json_to_xml(d, rnm) |
Convert a JSON/dict to XML with a root name |
mk_doctype(content, src) |
Create a named tuple with source and content |
mk_doc(index, content, src) |
Create a single document in Anthropic’s recommended XML format |
docs_xml(docs, srcs, prefix, details) |
Create XML string with multiple documents in Anthropic’s format |
read_file(fname) |
Read file content, converting notebooks to XML if needed |
files2ctx(fnames, prefix) |
Convert multiple files to XML context |
folder2ctx(folder, prefix, **kwargs) |
Generate XML context from all files in a folder |
folder2ctx_cli |
Command-line interface for folder2ctx |
nb2xml(fname) |
Convert a Jupyter notebook to XML |
cell2xml(cell) |
Convert a notebook cell to XML format |
cell2out(o) |
Convert notebook output to XML format |
- funccall (
toolslm.funccall)
Tools for function calling / tool use with LLMs:
| Function | Description |
|---|---|
get_schema(f) |
Generate JSON schema for a class, function, or method (for tool definitions) |
call_func(fc_name, fc_inputs, ns) |
Call a function by name with inputs using a namespace |
call_func_async(fc_name, fc_inputs, ns) |
Async version of call_func |
python(code, glb, loc, timeout) |
Execute Python code with timeout, returning final expression (like IPython) |
mk_ns(fs) |
Create a namespace dict from functions or dicts |
PathArg(path) |
Helper for filesystem path arguments |
- shell (
toolslm.shell)
Provides a minimal IPython shell for code execution:
| Function | Description |
|---|---|
get_shell() |
Get a TerminalInteractiveShell with minimal functionality (no logging, history, automagic) |
run_cell(cell, timeout) |
Patched method to run cells with timeout and output capture |
- download (
toolslm.download)
Functions for fetching and processing web content for LLM consumption:
| Function | Description |
|---|---|
clean_md(text, rm_comments, rm_details) |
Remove HTML comments and <details> sections from markdown |
read_md(url, ...) |
Read markdown from URL and clean it |
html2md(s, ignore_links) |
Convert HTML string to markdown |
read_html(url, sel, multi, wrap_tag, ...) |
Fetch URL, optionally select CSS elements, convert to clean markdown |
get_llmstxt(url, optional, n_workers) |
Get and expand an llms.txt file (LLM-friendly site documentation) |
split_url(url) |
Split URL into base, path, and filename |
find_docs(url) |
Find LLM-friendly docs (llms.txt or markdown) from a URL |
read_docs(url, ...) |
Read and return LLM-friendly documentation for a URL |
- md_hier (
toolslm.md_hier)
Parse markdown into a hierarchical dictionary structure:
| Function/Class | Description |
|---|---|
HeadingDict |
A dict subclass that also stores the original markdown text in .text |
create_heading_dict(text, rm_fenced) |
Parse markdown into nested dict based on heading hierarchy (e.g., result['Section']['Subsection'].text) |
This is useful for navigating and extracting specific sections from markdown documents programmatically.
Summary: toolslm provides a complete toolkit for preparing context for LLMs - converting files/folders to XML, generating function schemas for tool calling, executing code safely, fetching web documentation, and parsing markdown hierarchically.
msglm - Message Creation for LLMs
msglm makes it easier to create messages for language models like Claude and OpenAI GPTs.
Text messages: - mk_msgs - takes a list of strings and an api format (e.g. “openai”) and generates the correct format - mk_msg - creates a single message
API-specific wrappers: - mk_msg_anthropic / mk_msgs_anthropic - for Anthropic - mk_msg_openai / mk_msgs_openai - for OpenAI
Advanced features: - Image chats - pass raw image bytes in a list with your question - Prompt caching - pass cache=True to mk_msg or mk_msgs (Anthropic) - PDF support - pass raw pdf bytes just like image chats (Anthropic beta) - mk_ant_doc - creates document objects for Anthropic citations feature
Summary: toolslm helps structure context (files, XML), while msglm helps format the actual message payloads for different LLM APIs, handling text, images, PDFs, and advanced features like caching and citations.
Librairies using tools
| Library | GitHub Repo URL | Library Description |
|---|---|---|
| claudette | * * * https://github.com/AnswerDotAI/claudette | * Wrapper for Anthropic’s Python SDK. The SDK works well, but is quite low level. Claudette automates pretty much everything that can be automated, whilst providing full control. |
| cosette | * https://github.com/AnswerDotAI/cosette | * Claudette’s sister, a helper for OpenAI GPT. High-level wrapper for OpenAI’s SDK with stateful chat, tool calling, and streaming support. |
| lisette | https://github.com/AnswerDotAI/lisette | LiteLLM helper providing unified access to 100+ LLM providers using the OpenAI API format, with stateful chat, tools, web search, and reasoning support. |
| msglm | https://github.com/AnswerDotAI/msglm | Makes it easier to create messages for LLMs (Claude, OpenAI). Handles text, images, PDFs, prompt caching, and API-specific formatting. |
Development tools
| Library | GitHub Repo URL | Library Description |
|---|---|---|
| fastcaddy | https://github.com/AnswerDotAI/fastcaddy | Python wrapper for Caddy web server integration, simplifying HTTPS setup and reverse proxy configuration for FastHTML apps. |
| fastlite | * https://github.com/AnswerDotAI/fastlite | A thin, expressive wrapper around SQLite for Python, providing easy database operations with minimal boilerplate. |
| fastlucide | https://github.com/AnswerDotAI/fastlucide | Lucide icons integration for FastHTML applications, providing SVG icons as Python components. |
| fasthtml | https://github.com/AnswerDotAI/fasthtml | Modern Python web framework for building HTML applications with HTMX and Starlette, using pure Python for HTML generation. |
| ghapi | https://github.com/AnswerDotAI/ghapi | * A delightful and complete interface to GitHub’s amazing API. Auto-generated Python client covering all GitHub API endpoints. |
| monsterui | * https://github.com/AnswerDotAI/MonsterUI | * MonsterUI is a FastHTML Tailwind-powered UI framework for building beautiful web interfaces with minimal code. |
| nbdev | * https://github.com/AnswerDotAI/nbdev | Literate programming framework that allows writing library code, tests, and documentation in Jupyter Notebooks, then exporting to Python modules. |
| shell_sage | https://github.com/AnswerDotAI/shell_sage | * ShellSage saves sysadmins’ sanity by solving shell script snafus super swiftly. AI-powered shell command helper. |
Tools catalog
Here is the list of tools we want to develop.
Enhance IPython kernel
ipykernel_helper
ipykernel_helper is a library that provides helper utilities for working with IPython/Jupyter kernels.
It appears to be part of the infrastructure that makes the Dialog Engineering environment work smoothly, providing convenient functions that are useful in an interactive coding environment.
IPython InteractiveShell - Complete Feature Summary
InteractiveShell is the core class of IPython, providing an enhanced, interactive Python environment. It’s a singleton class that manages everything from code execution to history, completion, and output formatting.
- Code Execution
| Method | Description |
|---|---|
run_cell(raw_cell, store_history, silent, shell_futures, cell_id) |
Run a complete IPython cell including magics |
run_cell_async(...) |
Async version of run_cell |
run_code(code_obj, result, async_) |
Execute a compiled code object |
run_ast_nodes(nodelist, cell_name, interactivity, compiler, result) |
Run a sequence of AST nodes |
run_line_magic(magic_name, line) |
Execute a line magic like %timeit |
run_cell_magic(magic_name, line, cell) |
Execute a cell magic like %%bash |
safe_execfile(fname, *where, ...) |
Safely execute a .py file |
safe_execfile_ipy(fname, ...) |
Execute .ipy or .ipynb files with IPython syntax |
safe_run_module(mod_name, where) |
Safe version of runpy.run_module() |
- Namespace Management
| Method/Property | Description |
|---|---|
user_ns |
The user’s namespace dictionary |
user_ns_hidden |
Hidden namespace items (not shown to user) |
all_ns_refs |
List of all namespace dictionaries where objects may be stored |
push(variables, interactive) |
Inject variables into user namespace |
del_var(varname, by_name) |
Delete a variable from namespaces |
drop_by_id(variables) |
Remove variables if they match given values |
ev(expr) |
Evaluate Python expression in user namespace |
ex(cmd) |
Execute Python statement in user namespace |
init_user_ns() |
Initialize user-visible namespaces to defaults |
reset(new_session, aggressive) |
Clear all internal namespaces |
reset_selective(regex) |
Clear variables matching a regex pattern |
- Code Completion
| Method | Description |
|---|---|
init_completer() |
Initialize completion machinery |
complete(text, line, cursor_pos) |
Return completed text and list of completions |
set_completer_frame(frame) |
Set the frame for completion context |
set_custom_completer(completer, pos) |
Add a custom completer function |
- Code Transformation
| Method/Property | Description |
|---|---|
transform_cell(raw_cell) |
Transform input cell before parsing (handles %magic, !system, etc.) |
transform_ast(node) |
Apply AST transformations from ast_transformers |
input_transformers_post |
List of string transformers applied after IPython’s own |
ast_transformers |
List of ast.NodeTransformer instances for code modification |
check_complete(code) |
Check if code is ready to execute or needs continuation |
- History Management
| Method/Property | Description |
|---|---|
init_history() |
Setup command history and autosaves |
extract_input_lines(range_str, raw) |
Return input history slices as string |
history_length |
Total length of command history |
history_load_length |
Number of entries loaded at startup |
- Magic System
| Method | Description |
|---|---|
find_magic(magic_name, magic_kind) |
Find and return a magic by name |
find_line_magic(magic_name) |
Find a line magic |
find_cell_magic(magic_name) |
Find a cell magic |
register_magic_function(func, magic_kind, magic_name) |
Expose a function as a magic |
define_macro(name, themacro) |
Define a new macro |
- Object Inspection
| Method | Description |
|---|---|
object_inspect(oname, detail_level) |
Get object info |
object_inspect_mime(oname, detail_level, omit_sections) |
Get object info as mimebundle |
object_inspect_text(oname, detail_level) |
Get object info as formatted text |
find_user_code(target, raw, py_only, ...) |
Get code from history, file, URL, or string |
- Error Handling & Debugging
| Method/Property | Description |
|---|---|
showtraceback(exc_tuple, filename, tb_offset, ...) |
Display exception that just occurred |
showsyntaxerror(filename, running_compiled_code) |
Display syntax error |
showindentationerror() |
Handle IndentationError |
get_exception_only(exc_tuple) |
Get exception string without traceback |
set_custom_exc(exc_tuple, handler) |
Set custom exception handler |
excepthook(etype, value, tb) |
Custom sys.excepthook |
debugger(force) |
Call pdb debugger |
call_pdb |
Control auto-activation of pdb at exceptions |
pdb |
Automatically call pdb after every exception |
xmode |
Switch modes for exception handlers |
- System Interaction
| Method | Description |
|---|---|
system(cmd) |
Call cmd in subprocess, piping stdout/err |
system_piped(cmd) |
Call cmd with piped output |
system_raw(cmd) |
Call cmd using os.system or subprocess |
getoutput(cmd, split, depth) |
Get output from subprocess |
- Expression Evaluation
| Method | Description |
|---|---|
user_expressions(expressions) |
Evaluate dict of expressions, return rich mime-typed display_data |
var_expand(cmd, depth, formatter) |
Expand Python variables in a string |
- Async Support
| Property/Method | Description |
|---|---|
autoawait |
Automatically run await in top-level REPL |
should_run_async(raw_cell, ...) |
Determine if cell should run via coroutine runner |
loop_runner |
Select the loop runner for async code |
- Configuration Properties
| Property | Description |
|---|---|
autocall |
Auto-call callable objects (0=off, 1=smart, 2=full) |
autoindent |
Auto-indent code |
automagic |
Call magics without leading % |
colors |
Color scheme (nocolor, neutral, linux, lightbg) |
cache_size |
Output cache size (default 1000) |
ast_node_interactivity |
Which nodes display output (‘all’, ‘last’, ‘last_expr’, ‘none’, ‘last_expr_or_assign’) |
banner1 / banner2 |
Banner text before/after profile |
display_page |
Show pager content as regular output |
enable_html_pager |
Enable HTML in pagers |
enable_tip |
Show tip on IPython start |
show_rewritten_input |
Show rewritten input for autocall |
sphinxify_docstring |
Enable rich HTML docstrings |
warn_venv |
Warn if running in venv without IPython installed |
Architecture Summary
┌─────────────────────────────────────────────────────────────────────┐
│ InteractiveShell (Singleton) │
├─────────────────────────────────────────────────────────────────────┤
│ NAMESPACES │
│ user_ns, user_ns_hidden, all_ns_refs │
├─────────────────────────────────────────────────────────────────────┤
│ CODE EXECUTION PIPELINE │
│ raw_cell → transform_cell → parse → transform_ast → run_code │
├─────────────────────────────────────────────────────────────────────┤
│ SUBSYSTEMS │
│ • Completer (tab completion) │
│ • History Manager │
│ • Magic System (line/cell magics) │
│ • Display Publisher │
│ • Exception Handler / Debugger │
│ • Input Transformers (magics, shell commands) │
│ • AST Transformers │
├─────────────────────────────────────────────────────────────────────┤
│ EXTENSIONS │
│ • Hooks (set_hook) │
│ • Custom completers (set_custom_completer) │
│ • Custom exception handlers (set_custom_exc) │
│ • Magic functions (register_magic_function) │
└─────────────────────────────────────────────────────────────────────┘
How ipykernel_helper Extends InteractiveShell
The ipykernel_helper library uses fastcore’s @patch to add methods to InteractiveShell:
| Added Method | Purpose |
|---|---|
user_items() |
Get user-defined vars & functions (filtered) |
get_vars(vs) |
Get specific variable values |
get_schemas(fs) |
Get JSON schemas for functions (tool calling) |
ranked_complete(code, ...) |
Enhanced completion with ranking |
sig_help(code, ...) |
Signature help using Jedi |
xpush(**kw) |
Push with kwargs |
publish(data, ...) |
Enhanced display publishing |
This is how solveit extends IPython’s core to support AI-assisted coding!
Extract information from the user’s namespace
These are methods patched onto IPython’s InteractiveShell by ipykernel_helper.
Getting an InteractiveShell instance from a notebook code cell is straightforward.
from IPython import get_ipython
shell = get_ipython()They help extract information from the user’s namespace for use in dialog/AI contexts.
InteractiveShell.user_items(self, max_len=200, xtra_skip=())Purpose: Get user-defined variables and functions from the IPython namespace, filtering out system/internal items.
Returns: Tuple of (user_vars, user_fns) - user_vars: Dict of {var_name: repr_string} - variable names and their string representations (truncated to max_len) - user_fns: Dict of {func_name: signature_string} - user-defined function names and their signatures
What it filters out: - Hidden namespace items (user_ns_hidden) - Items starting with _ - Types, modules, methods, builtins, typing constructs - Specific names like nbmeta, receive_nbmeta
Use case: Let the AI know what variables and functions the user has defined in their session.
InteractiveShell.get_vars(self:InteractiveShell, vs:list, literal=True)Purpose: Retrieve specific variable values from the namespace
Parameters: - vs: List of variable names to retrieve - literal: If True, try to return actual Python literals; if False, return string representation
Returns: - Dict of {var_name: value} for variables that exist in the namespace
The literal=True behavior: - Tries to repr() the value, then literal_eval() it back - If that succeeds, returns the actual value (works for dicts, lists, strings, numbers, etc.) - If it fails (complex objects), returns the string representation instead
Use case: When the user shares specific variables with the AI using $varname notation, this function retrieves their current values.
InteractiveShell.get_schemas(self:InteractiveShell, fs:list)Purpose: Get JSON schemas for functions (for LLM tool calling).
Parameters: - fs: List of function names to get schemas for
Returns: Dict of {func_name: schema_or_error} - If successful: {‘type’: ‘function’, ‘function’: {JSON schema}} - If not found: “funcname not found. Did you run it?” - If schema error: “funcname: error message”
The schema format (via toolslm.funccall.get_schema):
{
'type': 'function',
'function': {
'name': 'my_func',
'description': 'Docstring here',
'parameters': {
'type': 'object',
'properties': {
'param1': {'type': 'string', 'descmription': '...'},
'param2': {'type': 'integer', 'description': '...'}
},
'required': ['param1']
}
}
}Use case: When the user shares tools with the AI using &toolname notation, this function generates the JSON schemas that tell the AI how to call those functions.
Solveit use of get_vars() and get_schema()
The $varname and &toolname Notation System
Overview
When you write a prompt in solveit containing $myvar or &mytool, the system: 1. Parses your prompt for these special notations 2. Extracts the referenced names 3. Retrieves their values/schemas from the Python namespace 4. Injects this information into the context sent to the AI
Variable Sharing: $varname
What Happens
User writes prompt:
┌─────────────────────────────────────────────┐
│ "Analyze the data in `$df` and tell me │
│ the average of the 'sales' column" │
└─────────────────────────────────────────────┘
│
▼
Parse for $-prefixed names
│
▼
Found: ['df']
│
▼
Call: shell.get_vars(['df'])
│
▼
Retrieve current value of df
│
▼
┌─────────────────────────────────────────────┐
│ Context sent to AI includes: │
│ │
│ <variables> │
│ df = DataFrame with 100 rows, columns: │
│ ['date', 'sales', 'region', ...] │
│ </variables> │
│ │
│ User prompt: "Analyze the data in df..." │
└─────────────────────────────────────────────┘
Key Behavior
- Current value: The variable’s value at the time the prompt is sent (not when it was first defined)
- Retroactive updates: If you change the variable and re-run, the context updates
- Safe representation: Uses
_safe_repr()to truncate large values (default 200 chars) - Literal conversion: Tries to preserve actual Python types when possible via
literal_eval
Example Flow
# Cell 1: Define data
df = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})
# Cell 2: Prompt with $df
# "What's the sum of column 'a' in `$df`?"
# Behind the scenes:
shell.get_vars(['df'])
# Returns: {'df': " a b\n0 1 4\n1 2 5\n2 3 6"}Tool Sharing: &toolname
What Happens
User writes prompt:
┌─────────────────────────────────────────────┐
│ "Use `&calculate` to add 15 and 27" │
└─────────────────────────────────────────────┘
│
▼
Parse for &-prefixed names
│
▼
Found: ['calculate']
│
▼
Call: shell.get_schemas(['calculate'])
│
▼
Generate JSON schema from function
│
▼
┌─────────────────────────────────────────────┐
│ AI receives tool definition: │
│ │
│ { │
│ "type": "function", │
│ "function": { │
│ "name": "calculate", │
│ "description": "Add two numbers", │
│ "parameters": { │
│ "type": "object", │
│ "properties": { │
│ "a": {"type": "integer"}, │
│ "b": {"type": "integer"} │
│ }, │
│ "required": ["a", "b"] │
│ } │
│ } │
│ } │
└─────────────────────────────────────────────┘
Requirements for Tools
From dialoghelper.core.is_usable_tool(), a function must have: 1. Type annotations for all parameters 2. A docstring (becomes the tool description)
# ✅ Valid tool
def calculate(a: int, b: int) -> int:
"Add two numbers together"
return a + b
# ❌ Not usable - no docstring
def bad_tool(a: int, b: int) -> int:
return a + b
# ❌ Not usable - missing type hints
def another_bad(a, b):
"Add numbers"
return a + bTool Execution Flow
When the AI decides to use a tool:
┌─────────────────────────────────────────────┐
│ AI Response: │
│ "I'll use calculate to add those numbers" │
│ │
│ Tool Call: │
│ name: "calculate" │
│ arguments: {"a": 15, "b": 27} │
└─────────────────────────────────────────────┘
│
▼
solveit receives tool call
│
▼
Look up 'calculate' in namespace
│
▼
Execute: calculate(a=15, b=27)
│
▼
Result: 42
│
▼
┌─────────────────────────────────────────────┐
│ Tool result sent back to AI │
│ │
│ AI continues: "The sum of 15 and 27 is 42" │
└─────────────────────────────────────────────┘
The Parsing Mechanism
The parsing likely uses regex to find these patterns:
import re
def parse_prompt(prompt):
# Find $varname in backticks
var_pattern = r'`\$(\w+)`'
vars_found = re.findall(var_pattern, prompt)
# Find &toolname in backticks
tool_pattern = r'`&(\w+)`'
tools_found = re.findall(tool_pattern, prompt)
return vars_found, tools_found
# Example
prompt = "Use `&calculate` on `$x` and `$y`"
parse_prompt(prompt)
# Returns: (['x', 'y'], ['calculate'])Complete Architecture
┌─────────────────────────────────────────────────────────────────────┐
│ USER WRITES PROMPT │
│ "Use `&my_tool` to process `$my_data` and return the result" │
└─────────────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────────┐
│ SOLVEIT FRONTEND │
│ 1. Parse prompt for `$name` and `&name` patterns │
│ 2. Extract: vars=['my_data'], tools=['my_tool'] │
│ 3. Request from kernel: get_vars(), get_schemas() │
└─────────────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────────┐
│ IPYKERNEL (Python) │
│ shell.get_vars(['my_data']) │
│ → {'my_data': [1, 2, 3, 4, 5]} │
│ │
│ shell.get_schemas(['my_tool']) │
│ → {'my_tool': {'type': 'function', 'function': {...}}} │
└─────────────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────────┐
│ CONTEXT ASSEMBLY │
│ <variables> │
│ my_data = [1, 2, 3, 4, 5] │
│ </variables> │
│ │
│ Tools: [my_tool schema] │
│ │
│ Prompt: "Use my_tool to process my_data..." │
└─────────────────────────────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────────┐
│ AI (Claude/etc) │
│ - Sees variable values in context │
│ - Has tool schemas available │
│ - Can call tools, results executed in user's kernel │
└─────────────────────────────────────────────────────────────────────┘
Key Points
| Aspect | $varname |
&toolname |
|---|---|---|
| Purpose | Share variable value | Share function as callable tool |
| Retrieval | get_vars() |
get_schemas() |
| What AI sees | Current value (as string/literal) | JSON schema for tool calling |
| Requirements | Variable must exist | Function needs types + docstring |
| AI can modify? | No (read-only snapshot) | Yes (can call the function) |
Practical Tips
- Variables are snapshots: The AI sees the value when you send the prompt
- Tools execute in your kernel: Side effects happen in your session
- Use backticks: The notation is
`$var`and`&tool`, not bare$var - Check tool validity: Use
is_usable_tool(func)to verify a function can be used as a tool - List tools: Use
mk_toollist(symbols)from dialoghelper to see available tools formatted for prompts
Customize Jupyterlab default behavior to enhance variables display
Let me explain how JupyterLab’s Debugger panel works and whether you could hook into it.
How JupyterLab’s Debugger Panel Gets Variables
The Debugger panel in JupyterLab uses a completely different mechanism than these ipykernel_helper functions: 1. Debug Adapter Protocol (DAP) - JupyterLab’s debugger communicates via the DAP standard 2. debugpy - The Python debugger backend 3. Kernel debug messages - Special debug_request/debug_reply messages over ZMQ The flow looks like:
JupyterLab Debugger UI
│
▼ (DAP protocol)
debugpy
│
▼ (inspects frames)
Python runtime
The Challenge
The user_items(), get_vars(), get_schemas() functions are: - Pull-based: Called on demand by the solveit frontend - Custom protocol: Use transient display messages or custom endpoints - Namespace-focused: Look at InteractiveShell.user_ns
JupyterLab’s Debugger is: - DAP-based: Uses standardized debug adapter protocol - Frame-focused: Inspects stack frames, not just the namespace - debugpy-controlled: The variable inspection happens inside debugpy
Option: Custom Variable Inspector Extension
Instead of modifying the Debugger, create a JupyterLab extension that: - Sends custom comm messages to the kernel - Calls user_items() or similar - Displays results in a custom panel
# Kernel side
from ipykernel.comm import Comm
def send_variables():
shell = get_ipython()
user_vars, user_fns = shell.user_items()
comm = Comm(target_name='variable_inspector')
comm.send({'vars': user_vars, 'fns': user_fns})Practical Recommendation
The easiest approach that gives you similar functionality to solveit’s variable display in standard JupyterLab:
# In your notebook or startup file
from IPython import get_ipython
from IPython.display import display, HTML
import json
def show_user_items(max_len=100):
"""Display user variables and functions like solveit does"""
shell = get_ipython()
user_vars, user_fns = shell.user_items(max_len=max_len)
html = "<details open><summary><b>Variables</b></summary><ul>"
for k, v in user_vars.items():
html += f"<li><code>{k}</code>: {v}</li>"
html += "</ul></details>"
html += "<details open><summary><b>Functions</b></summary><ul>"
for k, v in user_fns.items():
html += f"<li><code>{k}{v}</code></li>"
html += "</ul></details>"
display(HTML(html))
# Auto-show after each cell (optional)
# get_ipython().events.register('post_run_cell', lambda r: show_user_items())Provide smarter code completion and signature help
Why These Are Better Than Default IPython
Default IPython completion: - No custom ranking (alphabetical or frequency-based) - Includes all dunder methods always - Doesn’t prioritize user code
These custom versions: - Smart ranking: user code > builtins > everything else - Filters dunder unless explicitly requested - Context-aware (knows about your namespace) - Provides structured data for rich UI display
InteractiveShell.ranked_complete(self:InteractiveShell, code, line_no=None, col_no=None)Purpose: Provide intelligent, ranked code completions using Jedi, with custom ranking logic.
Parameters: - code: The code string to complete - line_no: Optional line number (1-indexed) - col_no: Optional column number (1-indexed)
Returns: List of completion objects with these attributes: - text - The completion text - type - Type of completion (param, function, module, etc.) - mod - Module where the item is defined - rank - Numeric rank (lower = higher priority)
Ranking logic (lower is better): - Rank 1: Parameters (function arguments being filled) - Rank 2: Local variables/functions (from main) - Rank 3: Module members (when completing module.something) - Rank 4: Builtins (like print, len, etc.) - Rank 5: Everything else (imported modules, third-party) - Rank +0.1: Private items (starting with _) - slightly lower priority
Special handling: - Filters out dunder methods unless the user explicitly types __ (so they’re not cluttering normal completion) - Deprioritizes private _methods slightly but doesn’t remove them
Use case: Provide better autocomplete suggestions in the solveit dialog environment, prioritizing user-defined items and parameters over stdlib/third-party items.
InteractiveShell.sig_help(self:InteractiveShell, code, line_no=None, col_no=None)Purpose: Get function signature information at the cursor position (like when you type func( and want to see the parameters).
Parameters: - code The code string - line_no Line number where cursor is (1-indexed) - col_no Column number where cursor is (1-indexed)
Returns: List of signature objects, each containing: - label - Full signature description (e.g., “print(value, …, sep=’ ‘, end=’’)”) - typ - Type of callable (function, method, class, etc.) - mod - Module name where it’s defined d- oc - Full docstring - idx - Current parameter index (which parameter the cursor is on) - params - List of parameter dicts with name and desc
How it works: - Uses Jedi’s Interpreter with the current namespace to get context-aware signatures - Falls back to Script (static analysis) if Interpreter doesn’t find anything - Extracts detailed information about each signature - Returns structured data about parameters and documentation
Use case: Power the signature help tooltip in the solveit editor, showing: - What parameters a function takes - Which parameter you’re currently typing - Documentation for each parameter - Full docstring
Inspector._get_info(self:Inspector, obj, oname='', formatter=None, info=None, detail_level=0, omit_sections=())Purpose: Customizes the ?? (double question mark) output to display source code as formatted Markdown.
Note: This is patched onto Inspector, not InteractiveShell.
Parameters: - obj: The object being inspected - oname Object name (string) - formatter: Optional formatter - info: Pre-computed info dict - detail_level: 0 = basic (?), 1 = detailed (??) - omit_sections: Sections to skip
How it works: - Calls the original _get_info method first (stored as _orig__get_info) - If detail_level == 0 (single ?), returns original output unchanged - If detail_level == 1 (double ??), creates enhanced Markdown output: - Source code in a Python fenced code block - File path in bold with backticks
Use case: Makes the ?? inspection output much more readable in environments that support Markdown rendering
Override IPython’s default behavior to enhance completions
JupyterLab communicates with the kernel via the Jupyter messaging protocol:
┌─────────────────┐ ┌─────────────────┐
│ JupyterLab │ complete_request │ IPython │
│ Frontend │ ─────────────────► │ Kernel │
│ │ │ │
│ │ ◄───────────────── │ │
│ │ complete_reply │ │
└─────────────────┘ └─────────────────┘
The relevant message types are: - complete_request / complete_reply - for autocompletion - inspect_request / inspect_reply - for signature/documentation help
Patch do_complete on the Kernel
The kernel’s do_complete method handles complete_request messages. You can patch it:
from ipykernel.ipkernel import IPythonKernel
from functools import wraps
# Store original
_original_do_complete = IPythonKernel.do_complete
@wraps(_original_do_complete)
def custom_do_complete(self, code, cursor_pos):
"""Enhanced completion with ranking."""
# Get original reply
reply = _original_do_complete(self, code, cursor_pos)
if reply['status'] == 'ok':
matches = reply['matches']
# Custom ranking logic
def rank(name):
...
# Filter __dunder__ unless explicitly typing them
text = code[:cursor_pos].split()[-1] if code[:cursor_pos].split() else ''
if '__' not in text:
matches = [m for m in matches if not m.startswith('__')]
# Sort by rank
reply['matches'] = sorted(matches, key=rank)
return reply
# Apply patch
IPythonKernel.do_complete = custom_do_completePatch do_inspect for Signature Help
For enhanced signature/documentation help:
from ipykernel.ipkernel import IPythonKernel
from jedi import Interpreter
from functools import wraps
_original_do_inspect = IPythonKernel.do_inspect
@wraps(_original_do_inspect)
def custom_do_inspect(self, code, cursor_pos, detail_level=0):
"""Enhanced inspect with Jedi signatures."""
# Try Jedi first for better signature info
try:
ns = self.shell.user_ns
# Get signatures at cursor
...
return {
'status': 'ok',
'found': True,
'data': {'text/markdown': text, 'text/plain': sig.docstring()},
'metadata': {}
}
except Exception:
pass
# Fall back to original
return _original_do_inspect(self, code, cursor_pos, detail_level)
IPythonKernel.do_inspect = custom_do_inspectCreate an IPython Extension
Package everything as a proper IPython extension:
def load_ipython_extension(ip):
"""Called when extension is loaded via %load_ext"""
from ipykernel.ipkernel import IPythonKernel
from functools import wraps
# Patch do_complete
_orig = IPythonKernel.do_complete
@wraps(_orig)
def patched_do_complete(self, code, cursor_pos):
...
IPythonKernel.do_complete = patched_do_complete
print("Enhanced completions loaded!")
def unload_ipython_extension(ip):
"""Called when extension is unloaded"""
# Restore original if needed
passUsage:
%load_ext my_completer_extension
Or add to ipython_config.py:
c.InteractiveShellApp.extensions = ['my_completer_extension']
Programmatically inject variables into the user namespace
InteractiveShell.xpush(self:InteractiveShell, interactive=False, **kw):
"Like `push`, but with kwargs"
self.push(kw, interactive=interactive)Purpose: A convenience wrapper around InteractiveShell.push() that accepts keyword arguments instead of a dictionary.
Parameters: - interactive: If True, the variables are treated as if typed interactively (affects display) - **kw: Variables to inject as keyword arguments
Comparison:
shell = get_ipython()
# Standard push() - requires a dictionary
shell.push({'x': 42, 'name': 'Alice'})
# xpush() - cleaner kwargs syntax
shell.xpush(x=42, name='Alice')Use case: Cleaner API when programmatically injecting variables into the user namespace.
What InteractiveShell.push() does
Purpose: Programmatically add variables to the user’s namespace - as if the user had typed them directly in the notebook.
The interactive Parameter controls how the variables are treated: - interactive=True (default): - Variables treated as if user typed them - Subject to display hooks (e.g., last expression displays) - Triggers post_execute hooks - Can show output - interactive=False: - Variables injected silently - No display output - Minimal side effects - Used for “behind the scenes” injection
Use Cases in solveit
- Tool Results Injection
When the AI calls a tool, the result needs to be available to the user:
# AI calls a tool: calculate(5, 3)
def calculate(a: int, b: int) -> int:
return a + b
result = calculate(5, 3) # Tool executes
# Inject result into user namespace
shell.xpush(tool_result=result, interactive=False)
# User can now access it:
print(tool_result) # 8Where: claudette.toolloop / cosette.toolloop / lisette.core
File & Function:
# claudette/toolloop.py
@patch
def toolloop(self:Chat, pr, max_steps=10, ...):
# After tool execution, results are automatically added to chat history
# The tool result becomes part of the conversation contextImplementation: Tool results are typically kept in the chat history rather than pushed to namespace. However, if you wanted to inject them:
# dialoghelper/core.py (hypothetical)
def execute_tool_and_inject(tool_name, args, ns):
result = call_func(tool_name, args, ns)
shell = get_ipython()
shell.xpush(**{f'{tool_name}_result': result}, interactive=False)
return resultActual location: Not directly implemented in the libraries we explored - this would be custom solveit backend code.
- Loading Context from External Sources
When importing from a gist or URL:
# User runs: import_gist('abc123')
# Behind the scenes:
gist_code = fetch_gist('abc123')
exec(gist_code, globals_dict := {})
# Inject all functions/classes from gist
shell.xpush(interactive=False, **globals_dict)
# Now user has access to everything from the gistWhere: dialoghelper.core
File & Function:
# dialoghelper/core.py
def import_gist(gist_id:str, mod_name:str='gist', run:bool=False)Key line: shell.user_ns[k] = v - directly modifies namespace (equivalent to push)
- AI-Generated Variables
When AI generates code that creates variables:
# AI generates this code:
code = """
import pandas as pd
df = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})
"""
# Execute and capture namespace
exec_namespace = {}
exec(code, exec_namespace)
# Push the dataframe to user
shell.xpush(df=exec_namespace['df'])
# User now has 'df' available
df.head()Where: execnb.shell / toolslm.shell
File & Function:
# toolslm/shell.py
def get_shell():
"Get a `TerminalInteractiveShell` with minimal functionality"
# Shell maintains its own namespace
# Variables from executed code live in shell.user_ns# execnb/shell.py (from execnb library)
class CaptureShell:
def run(self, code):
# Executes code and captures outputs
# Variables stay in shell's namespaceFor injecting into main namespace: Custom solveit code would do:
shell = get_ipython()
shell.xpush(**exec_namespace, interactive=False)- Sharing Variables Between Messages
When running code in one message and needing results in another:
# Message 1 (code cell):
x = expensive_computation()
# Behind the scenes, solveit might:
shell.xpush(_last_result=x, interactive=False)
# Message 2 (prompt to AI):
# AI can reference _last_resultWhere: dialoghelper.core
File & Function:
# dialoghelper/core.py
def add_msg(content, placement='afterCurrent', msgid=None, msg_type='note', ...):
"Add a message to dialog"
# Messages can contain code that executes
# Results are automatically in namespace
def run_msg(msgid, dname=None):
"Queue a message for execution"
# Executes code cell, results go to namespace automaticallyImplementation: When code cells execute in solveit, they naturally share the same namespace. No explicit push() needed - it’s the default behavior.
- Pre-loading Helper Functions
When starting a dialog, inject helper utilities:
# On dialog start
from dialoghelper import read_msg, add_msg, update_msg
shell.xpush(
read_msg=read_msg,
add_msg=add_msg,
update_msg=update_msg,
interactive=False
)
# Now available everywhere in the dialogWhere: ipykernel_helper.core
File & Function:
# ipykernel_helper/core.py
def load_ipython_extension(ip):
"Load extension and inject helper functions"
from ipykernel_helper import transient, run_cmd
ns = ip.user_ns
ns['read_url'] = read_url
ns['transient'] = transient
ns['run_cmd'] = run_cmdKey line: ns['read_url'] = read_url - directly modifies namespace dictionary
Also:
# dialoghelper/core.py (hypothetical startup)
def initialize_dialog_helpers():
shell = get_ipython()
shell.xpush(
read_msg=read_msg,
add_msg=add_msg,
update_msg=update_msg,
find_msgs=find_msgs,
interactive=False
)- Restoring Session State
When loading a saved dialog:
# Load saved state
saved_vars = load_dialog_state('dialog_123')
# saved_vars = {'x': 42, 'data': [...], 'model': <object>}
# Restore to namespace
shell.xpush(interactive=False, **saved_vars)
# User's variables are backWhere: Not explicitly implemented in the libraries we explored
Would be in: Custom solveit backend (not open source)
Hypothetical implementation:
# solveit/session.py (hypothetical)
def restore_dialog_state(dialog_id):
"Restore variables from saved dialog"
import pickle
# Load saved state
with open(f'dialogs/{dialog_id}/state.pkl', 'rb') as f:
saved_vars = pickle.load(f)
# Restore to namespace
shell = get_ipython()
shell.xpush(interactive=False, **saved_vars)
return saved_varsKey Insight
Most of these behaviors don’t explicitly call push() or xpush() - they use alternative approaches:
- Direct namespace modification:
shell.user_ns['key'] = value - Natural code execution: Code cells share namespace automatically
- Extension loading: Inject at startup via
load_ipython_extension()
The xpush() convenience wrapper is available but many implementations use the underlying mechanisms directly. The actual solveit backend (which orchestrates dialogs, AI interactions, and message execution) likely uses xpush() more extensively, but that code isn’t in the open-source libraries we explored.
Architecture: How solveit Uses push()
┌─────────────────────────────────────────────────────────────────────┐
│ USER ACTION │
│ • Runs a tool │
│ • Imports a gist │
│ • AI generates code │
│ • Loads dialog state │
└────────────────────────────┬────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────────┐
│ SOLVEIT BACKEND │
│ • Executes code in isolated namespace │
│ • Captures results │
│ • Determines what to share with user │
└────────────────────────────┬────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────────┐
│ shell.xpush() │
│ • Injects variables into user namespace │
│ • Makes results accessible in subsequent cells │
└────────────────────────────┬────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────────┐
│ USER NAMESPACE │
│ Variables now available: │
│ • print(tool_result) │
│ • df.head() │
│ • my_imported_function() │
└─────────────────────────────────────────────────────────────────────┘
Example: Complete Tool Execution Flow
# 1. User shares a tool with AI
def fetch_data(url: str) -> dict:
"""Fetch JSON from URL"""
import httpx
return httpx.get(url).json()
# 2. AI decides to call it
# Behind the scenes in solveit:
tool_name = "fetch_data"
tool_args = {"url": "https://api.github.com/users/torvalds"}
# 3. Execute tool
from toolslm.funccall import call_func
result = call_func(tool_name, tool_args, namespace={'fetch_data': fetch_data})
# 4. Inject result into user namespace
shell = get_ipython()
shell.xpush(
github_data=result, # Make result accessible
interactive=False # Silent injection
)
# 5. User can now use it:
print(github_data['name']) # 'Linus Torvalds'Why Not Just Execute Directly?
You might wonder: why use push() instead of just executing code directly?
Problems: - Harder to control scope - Difficult to inject complex objects - No clean separation between execution and namespace injection - Can’t easily inject from external sources
Benefits of using push: - Clean API for namespace manipulation - Works with any Python object (including unpicklable ones) - Respects IPython’s namespace management - Can control interactive behavior - Integrates with IPython’s hooks and events
Summary
| Feature | Purpose in solveit |
|---|---|
| Tool results | Make AI tool outputs available to user |
| Gist imports | Inject functions from GitHub gists |
| AI code execution | Share variables from AI-generated code |
| Session restoration | Reload saved dialog state |
| Helper injection | Pre-load utility functions |
| Context sharing | Pass data between messages/cells |
| Use Case | Library | File | Function/Method |
|---|---|---|---|
| 1. Tool Results | (custom solveit) | N/A | Not in open source libs |
| 2. Gist Imports | dialoghelper |
core.py |
import_gist() |
| 3. AI-Generated Vars | execnb / toolslm |
shell.py |
CaptureShell.run() / get_shell() |
| 4. Message Variables | dialoghelper |
core.py |
run_msg(), add_msg() |
| 5. Helper Pre-loading | ipykernel_helper |
core.py |
load_ipython_extension() |
| 6. Session Restoration | (custom solveit) | N/A | Not in open source libs |
Key insight: push() / xpush() is the bridge between solveit’s backend execution and the user’s interactive namespace. It’s how results from AI actions become available for the user to work with.
Send commands and display data to the Javascript frontend
InteractiveShell.run_cmd(cmd, data='', meta=None, update=False, **kw)Purpose: a convenience wrapper that sends a command to the frontend via the transient mechanism.
Internally using: InteractiveShell.transient().
In Jupyter, when you output something, it gets sent to the frontend via a display_data or execute_result message. These messages have three main parts:
{
"data": {"text/plain": "Hello", "text/html": "<b>Hello</b>"}, # The content
"metadata": {}, # Extra info about the content
"transient": {} # Data that should NOT be persisted in the notebook
}Sends display data to the frontend where the actual payload is in the transient field, not the main data field.
The key insight: transient data is displayed but not saved when the notebook is saved. It’s ephemeral.
Why use transient? - Not saved to notebook - Commands and temporary UI updates don’t clutter the saved .ipynb file - Custom frontend communication - The solveit frontend watches for specific transient keys - Ephemeral state - Progress indicators, status updates, commands that shouldn’t persist
Example usage: run_cmd("scroll_to", msg_id="abc123")
InteractiveShell.publish(self:InteractiveShell, data='', subtype='plain', mimetype='text', meta=None, update=False, **kw)
InteractiveShell.transient(data='', subtype='plain', mimetype='text', meta=None, update=False, **kw)Purpose: A flexible method to publish display data to the frontend, with support for transient data.
Parameters: - data: Content to display (string, DisplayObject, or dict) - subtype: MIME subtype (default: ‘plain’) - mimetype: MIME type (default: ‘text’) → combined as text/plain - meta: Metadata dictionary - update: If True, updates a previous display with same -display_id - **kw: Extra kwargs go into the transient field
How it works: - If data is a DisplayObject (like HTML, Markdown), it formats it properly - If data is not a dict/mapping, it wraps it as {mimetype/subtype: data} - Publishes via display_pub.publish() with transient data in **kw
Examples:
shell = get_ipython()
# Publish plain text
shell.publish("Hello, world!")
# Publish HTML
shell.publish("<b>Bold text</b>", subtype='html')
# Publish with transient data (for frontend commands)
shell.publish("status", cmd="update_status", id="123")
# Publish a DisplayObject
from IPython.display import HTML
shell.publish(HTML("<h1>Title</h1>"))
# Update an existing display
shell.publish("Updated content", update=True, display_id="my_display")publish() is the lower-level method that transient() essentially wraps.
Web access
I have access to two main tools:
- Web Search - I can search for current information, recent events, technical documentation, news, and anything where up-to-date data would be helpful. I use this when:
- You need recent information (after my March 2025 knowledge cutoff)
- You’re looking for specific facts or current conditions
- Real-time data is important (weather, news, stock prices, etc.)
- Read URL - I can fetch and read content from specific web pages you provide, which is useful for analyzing articles, documentation, or other online content.
I use these tools strategically - I don’t search for things I already know well (like general programming concepts, historical facts, or established knowledge), but I will search when current or specific information would genuinely help answer your question.
Model inference service
The web_search function isn’t defined in your Python environment - it’s a tool that’s available to me (the AI assistant) but not directly accessible to you in your Python code.
When I use web_search, I’m calling it through my own tool-calling mechanism, not through your Python interpreter. It’s part of my capabilities, separate from the Python environment you’re working in.
web_search(query: str)Parameters - Takes a search query string and returns web search results
ipykernel_helper
Full-featured URL reader that can extract sections, convert to markdown, handle math (KaTeX/MathJax), absolutify image URLs, and optionally tag images for AI processing
read_url(
url: str,
as_md: bool = True,
extract_section: bool = True,
selector: str = None
)Parameters - url: The URL to read (required) - as_md: Whether to convert HTML to Markdown (default: True) - extract_section: If URL has an anchor, return only that section (default: True) - selector: CSS selector to extract specific sections using BeautifulSoup.select syntax (optional)
Uses internally - scrape_url() - Fetch URL content using cloudscraper (handles anti-bot protections) - get_md() - Convert HTML to clean markdown