mirror of
https://github.com/atuinsh/atuin.git
synced 2026-03-21 11:32:32 +01:00
## Summary
This PR adds a persistent, in-memory search index to the Atuin daemon,
enabling fast fuzzy search without the startup delay of building an
index each time the TUI opens.
### Key Changes
- **Daemon search service**: A new gRPC service that maintains a Nucleo
fuzzy search index in memory
- **Real-time index updates**: The daemon listens for history events
(new commands, synced records) and updates the index immediately
- **Filter mode support**: All existing filter modes work (Global, Host,
Session, Directory, Workspace)
- **New search engine**: `daemon-fuzzy` search mode that queries the
daemon instead of building a local index
- **Paged history loading**: Database pagination support for efficient
initial index loading
- **Configurable logging**: New `[logs]` settings section for daemon and
search log configuration
- **Component-based daemon architecture**: Refactored daemon internals
into a modular, event-driven system
- **Fallback to DB search for regex**: Since Nucleo doesn't support
regex matching
## Daemon Architecture
The daemon has been refactored to use a component-based, event-driven
architecture that makes it easier to add new functionality and reason
about the system.
### Core Concepts
```
┌─────────────────────────────────────────────────────────────────────────────┐
│ Atuin Daemon │
│ │
│ ┌─────────────┐ ┌──────────────────────────────────────────────────┐ │
│ │ Daemon │ │ Components │ │
│ │ Handle │────▶│ │ │
│ │ │ │ ┌─────────────┐ ┌─────────────┐ ┌────────────┐ │ │
│ │ • emit() │ │ │ History │ │ Search │ │ Sync │ │ │
│ │ • subscribe │ │ │ Component │ │ Component │ │ Component │ │ │
│ │ • settings │ │ │ │ │ │ │ │ │ │
│ │ • databases │ │ │ gRPC service│ │ gRPC service│ │ background │ │ │
│ └─────────────┘ │ │ WIP history │ │ Nucleo index│ │ sync │ │ │
│ │ │ └─────────────┘ └─────────────┘ └────────────┘ │ │
│ │ └──────────────────────────────────────────────────┘ │
│ │ ▲ │
│ ▼ │ │
│ ┌─────────────────────────────────────┴────────────────────────────────┐ │
│ │ Event Bus (broadcast) │ │
│ │ │ │
│ │ HistoryStarted │ HistoryEnded │ RecordsAdded │ SyncCompleted │ ... │ │
│ └──────────────────────────────────────────────────────────────────────┘ │
│ ▲ │
│ ┌───────────────────────────────────┴──────────────────────────────────┐ │
│ │ Control Service (gRPC) │ │
│ │ External event injection from CLI commands │ │
│ └──────────────────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────────────────┘
```
### DaemonHandle
A lightweight, cloneable handle that provides access to shared daemon
resources:
- **Event emission**: `handle.emit(DaemonEvent::...)` broadcasts to all
components
- **Event subscription**: `handle.subscribe()` returns a receiver for
the event bus
- **Settings**: `handle.settings()` for configuration access
- **Databases**: `handle.history_db()` and `handle.store()` for data
access
### Component Trait
Components implement a simple lifecycle:
```rust
#[async_trait]
trait Component: Send + Sync {
fn name(&self) -> &'static str;
async fn start(&mut self, handle: DaemonHandle) -> Result<()>;
async fn handle_event(&mut self, event: &DaemonEvent) -> Result<()>;
async fn stop(&mut self) -> Result<()>;
}
```
### Event-Driven Design
Components communicate via events rather than direct coupling:
| Event | Emitted By | Consumed By |
|-------|-----------|-------------|
| `HistoryStarted` | History gRPC | Search (logging) |
| `HistoryEnded` | History gRPC | Search (index update) |
| `RecordsAdded` | Sync | Search (index update) |
| `HistoryPruned` | CLI (via Control) | Search (index rebuild) |
| `HistoryDeleted` | CLI (via Control) | Search (index rebuild) |
| `ForceSync` | CLI (via Control) | Sync |
| `ShutdownRequested` | Signal handler | All (graceful shutdown) |
### External Event Injection
CLI commands can inject events into a running daemon:
```rust
// After `atuin history prune`
emit_event(DaemonEvent::HistoryPruned).await?;
// After deleting specific items
emit_event(DaemonEvent::HistoryDeleted { ids }).await?;
// Request immediate sync
emit_event(DaemonEvent::ForceSync).await?;
```
This ensures the daemon's search index stays in sync with database
changes made by CLI commands.
## Search Architecture
The search service uses a [forked version of
Nucleo](https://github.com/atuinsh/nucleo-ext) that adds filter and
scorer callbacks, enabling efficient filtering and frecency-based
ranking.
```
┌────────────────────────────────────────────────────────────────┐
│ Atuin Daemon │
│ │
│ ┌─────────────────┐ ┌──────────────────────────────────┐ │
│ │ Event System │───▶│ Search Component │ │
│ │ │ │ │ │
│ │ • RecordsAdded │ │ ┌────────────────────────────┐ │ │
│ │ • HistoryEnded │ │ │ Deduplicated Index │ │ │
│ │ • HistoryPruned │ │ │ │ │ │
│ └─────────────────┘ │ │ CommandData per command: │ │ │
│ │ │ • Global frecency │ │ │
│ ┌─────────────────┐ │ │ • Filter indexes (sets) │ │ │
│ │ Background Task │ │ │ • Invocation history │ │ │
│ │ │ │ └────────────────────────────┘ │ │
│ │ Rebuilds │ │ │ │ │
│ │ frecency map │ │ ▼ │ │
│ │ every 60s │───▶│ ┌────────────────────────────┐ │ │
│ └─────────────────┘ │ │ Nucleo (forked) │ │ │
│ │ │ │ │ │
│ │ │ • Filter callback │ │ │
│ │ │ • Scorer callback │ │ │
│ │ │ • Fuzzy matching │ │ │
│ │ └────────────────────────────┘ │ │
│ └──────────────────────────────────┘ │
│ │ │
│ │ gRPC (Unix socket) │
└──────────────────────────────────────│─────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────┐
│ Search TUI (Client) │
│ │
│ 1. Send query + filter mode + context to daemon │
│ 2. Receive matching history IDs (ranked by frecency) │
│ 3. Hydrate full records from local SQLite database │
│ 4. Display results in TUI │
└─────────────────────────────────────────────────────────────────┘
```
### Nucleo Fork
The [nucleo-ext fork](https://github.com/atuinsh/nucleo-ext) adds two
key features to Nucleo:
1. **Filter callback**: Pre-filter items before fuzzy matching (used for
directory/host/session filtering)
2. **Scorer callback**: Compute custom scores after matching (used for
frecency ranking)
```rust
// Filter: only include commands run in current directory
nucleo.set_filter(Some(Arc::new(|cmd: &String| {
passing_commands.contains(cmd)
})));
// Scorer: combine fuzzy score with frecency
nucleo.set_scorer(Some(Arc::new(|cmd: &String, fuzzy_score: u32| {
let frecency = frecency_map.get(cmd).unwrap_or(0);
fuzzy_score + (frecency * 10)
})));
```
### Deduplicated Index
Commands are stored once per unique command text, with metadata tracking
all invocations:
```rust
struct CommandData {
command: String,
invocations: Vec<Invocation>, // All times this command was run
global_frecency: FrecencyData, // Precomputed frecency score
// O(1) filter indexes
directories: HashSet<String>, // All cwds where command was run
hosts: HashSet<String>, // All hostnames
sessions: HashSet<String>, // All session IDs
}
```
This deduplication means:
- **Fewer items to match**: ~13K unique commands vs ~62K history entries
- **O(1) filter checks**: HashSet lookups instead of scanning
invocations
- **Single frecency score**: Global frecency computed once, used for all
filter modes
### Frecency Scoring
Frecency (frequency + recency) scoring prioritizes recently and
frequently used commands:
```rust
fn compute_frecency(count: u32, last_used: i64, now: i64) -> u32 {
let age_hours = (now - last_used) / 3600;
// Recency: decays over time (half-life ~24 hours)
let recency = (100.0 * (-age_hours as f64 / 24.0).exp()) as u32;
// Frequency: logarithmic scaling
let frequency = (count.ln() * 20.0).min(100.0) as u32;
recency + frequency
}
```
The frecency map is:
- **Precomputed by background task** every 60 seconds
- **Never computed inline** during search (no latency impact)
- **Graceful fallback**: If unavailable, search works without frecency
ranking
### Filter Mode Implementation
| Filter Mode | Implementation |
|-------------|----------------|
| Global | No filter (all commands) |
| Directory | `command.directories.contains(cwd)` |
| Workspace | `command.directories.any(\|d\| d.starts_with(git_root))` |
| Host | `command.hosts.contains(hostname)` |
| Session | `command.sessions.contains(session_id)` |
Filters are pre-computed into a HashSet before the search, making the
filter callback O(1).
### Search Flow
1. **Daemon startup**: Loads history from SQLite in pages, builds
deduplicated index
2. **Frecency precompute**: Background task builds frecency map after
history loads
3. **Search request**: Client sends query with filter mode and context
4. **Filter**: Pre-computed HashSet determines which commands pass the
filter
5. **Match**: Nucleo fuzzy matches the query against command text
6. **Score**: Frecency scorer ranks results (fuzzy score + frecency *
10)
7. **Response**: Returns history IDs for the most recent invocation of
each matching command
8. **Hydration**: Client fetches full records from local SQLite
### Configuration
```toml
# Enable daemon + autostart
[daemon]
enabled = true
autostart = true
# Enable daemon-based fuzzy search
[search]
search_mode = "daemon-fuzzy"
```
## Performance
Performance varies based on several factors, but in most initial testing
with the new architecture shows improvement:
* **Nucleo performs searches up to 4.5x faster**: direct DB search
averages 18.07ms, but the daemon completes the same queries in 3.99ms.
* **IPC overhead is significant, but acceptable**: a significant amount
of wall-time is taken up by the transfer of data over IPC (via UDS in
this case). This averages to about ~7.8ms and accounts for 66% of
client-side wall time.
* **Tail latency improves at every layer**: p99 times correspond to
initial requests, worst-case query patterns, etc. but the average p99
daemon-based response time is 3.6x better than the associated DB-based
search p99 time
* **Query complexity no longer impacts performance**: the Nucleo-based
search shows consistent 2-7ms times regardless of query pattern. The
DB-based search had a 17x variance (3.59ms to 62.46ms).
Interestingly, @ellie - who has a larger history store than I do - gets
even better performance on the IPC layer. This could use a lot more
testing in various edge cases and on various hardware, but seems
promising.
### Regular DB search
```
Individual calls for: db_search
--------------------------------------------------------------------------------------------------------------
# Wall Busy Idle Fields
--------------------------------------------------------------------------------------------------------------
1 32.25ms 32.20ms 47.70µs {"mode":"Fuzzy","query":"^"}
2 19.48ms 19.40ms 84.20µs {"mode":"Fuzzy","query":"^c"}
3 20.40ms 20.10ms 297.00µs {"mode":"Fuzzy","query":"^ca"}
4 13.07ms 13.00ms 69.90µs {"mode":"Fuzzy","query":"^car"}
5 12.17ms 12.10ms 67.10µs {"mode":"Fuzzy","query":"^carg"}
6 20.78ms 20.70ms 76.60µs {"mode":"Fuzzy","query":"^cargo"}
7 9.15ms 9.10ms 53.20µs {"mode":"Fuzzy","query":"^cargo "}
8 10.24ms 10.00ms 237.00µs {"mode":"Fuzzy","query":"^cargo b"}
9 10.01ms 9.68ms 325.00µs {"mode":"Fuzzy","query":"^cargo bu"}
10 5.89ms 5.83ms 57.20µs {"mode":"Fuzzy","query":"^cargo bui"}
11 8.85ms 8.28ms 568.00µs {"mode":"Fuzzy","query":"^cargo buil"}
12 7.70ms 7.49ms 212.00µs {"mode":"Fuzzy","query":"^cargo build"}
13 3.59ms 3.53ms 57.00µs {"mode":"Fuzzy","query":"^cargo build$"}
14 6.50ms 6.44ms 63.60µs {"mode":"Fuzzy","query":"^cargo "}
15 6.48ms 6.38ms 100.00µs {"mode":"Fuzzy","query":"!"}
16 31.68ms 31.60ms 75.90µs {"mode":"Fuzzy","query":"!g"}
17 62.46ms 62.40ms 58.90µs {"mode":"Fuzzy","query":"!gi"}
18 30.35ms 30.30ms 46.90µs {"mode":"Fuzzy","query":"!git"}
19 53.84ms 53.80ms 40.80µs {"mode":"Fuzzy","query":"!git "}
20 19.24ms 19.20ms 39.70µs {"mode":"Fuzzy","query":"!git c"}
21 22.03ms 22.00ms 34.70µs {"mode":"Fuzzy","query":"!git co"}
22 17.13ms 17.00ms 133.00µs {"mode":"Fuzzy","query":"!git com"}
23 16.14ms 15.90ms 242.00µs {"mode":"Fuzzy","query":"!git comm"}
24 5.11ms 5.08ms 28.60µs {"mode":"Fuzzy","query":"!git commi"}
25 7.31ms 7.26ms 52.70µs {"mode":"Fuzzy","query":"!git commit"}
Summary: 25 calls
Wall: avg=18.07ms, min=3.59ms, max=62.46ms, p50=13.07ms, p99=62.46ms
Busy: avg=17.95ms, min=3.53ms, max=62.40ms, p50=13.00ms, p99=62.40ms
```
### Daemon-based search
**Client**
```
Individual calls for: daemon_search
--------------------------------------------------------------------------------------------------------------
# Wall Busy Idle Fields
--------------------------------------------------------------------------------------------------------------
1 13.05ms 2.55ms 10.50ms {"query":"^"}
2 10.65ms 1.40ms 9.25ms {"query":"^c"}
3 10.72ms 1.18ms 9.54ms {"query":"^ca"}
4 5.54ms 485.00µs 5.06ms {"query":"^car"}
5 15.02ms 1.02ms 14.00ms {"query":"^carg"}
6 9.49ms 840.00µs 8.65ms {"query":"^cargo"}
7 5.53ms 555.00µs 4.97ms {"query":"^cargo "}
8 8.56ms 717.00µs 7.84ms {"query":"^cargo b"}
9 12.34ms 1.24ms 11.10ms {"query":"^cargo bu"}
10 8.38ms 650.00µs 7.73ms {"query":"^cargo bui"}
11 13.07ms 770.00µs 12.30ms {"query":"^cargo buil"}
12 17.11ms 709.00µs 16.40ms {"query":"^cargo build"}
13 15.41ms 907.00µs 14.50ms {"query":"^cargo build$"}
14 8.19ms 665.00µs 7.52ms {"query":"^cargo "}
15 7.98ms 1.72ms 6.26ms {"query":"!"}
16 13.56ms 856.00µs 12.70ms {"query":"!g"}
17 8.11ms 624.00µs 7.49ms {"query":"!gi"}
18 14.57ms 775.00µs 13.80ms {"query":"!git"}
19 14.18ms 779.00µs 13.40ms {"query":"!git "}
20 9.62ms 802.00µs 8.82ms {"query":"!git c"}
21 15.50ms 1.50ms 14.00ms {"query":"!git co"}
22 11.58ms 1.48ms 10.10ms {"query":"!git com"}
23 13.82ms 2.12ms 11.70ms {"query":"!git comm"}
24 17.48ms 2.18ms 15.30ms {"query":"!git commi"}
25 14.81ms 1.71ms 13.10ms {"query":"!git commit"}
Summary: 25 calls
Wall: avg=11.77ms, min=5.53ms, max=17.48ms, p50=12.34ms, p99=17.48ms
Busy: avg=1.13ms, min=485.00µs, max=2.55ms, p50=856.00µs, p99=2.55ms
```
**Daemon**
```
Individual calls for: daemon_search_query
--------------------------------------------------------------------------------------------------------------
# Wall Busy Idle Fields
--------------------------------------------------------------------------------------------------------------
1 1.75ms 250ns 1.75ms {"query":"^","query_id":1}
2 4.58ms 125ns 4.58ms {"query":"^c","query_id":2}
3 4.39ms 250ns 4.39ms {"query":"^ca","query_id":3}
4 2.52ms 125ns 2.52ms {"query":"^car","query_id":4}
5 4.44ms 250ns 4.44ms {"query":"^carg","query_id":5}
6 3.66ms 167ns 3.66ms {"query":"^cargo","query_id":6}
7 2.38ms 84ns 2.38ms {"query":"^cargo ","query_id":7}
8 4.13ms 84ns 4.13ms {"query":"^cargo b","query_id":8}
9 4.40ms 167ns 4.40ms {"query":"^cargo bu","query_id":9}
10 3.87ms 125ns 3.87ms {"query":"^cargo bui","query_id":10}
11 4.36ms 84ns 4.36ms {"query":"^cargo buil","query_id":11}
12 3.96ms 333ns 3.96ms {"query":"^cargo build","query_id":12}
13 4.61ms 167ns 4.61ms {"query":"^cargo build$","query_id":13}
14 4.20ms 209ns 4.20ms {"query":"^cargo ","query_id":14}
15 238.17µs 167ns 238.00µs {"query":"!","query_id":15}
16 4.44ms 125ns 4.44ms {"query":"!g","query_id":16}
17 3.47ms 83ns 3.47ms {"query":"!gi","query_id":17}
18 4.57ms 125ns 4.57ms {"query":"!git","query_id":18}
19 7.15ms 167ns 7.15ms {"query":"!git ","query_id":19}
20 4.27ms 250ns 4.27ms {"query":"!git c","query_id":20}
21 5.19ms 292ns 5.19ms {"query":"!git co","query_id":21}
22 4.29ms 417ns 4.29ms {"query":"!git com","query_id":22}
23 4.08ms 125ns 4.08ms {"query":"!git comm","query_id":23}
24 4.50ms 167ns 4.50ms {"query":"!git commi","query_id":24}
25 4.35ms 208ns 4.35ms {"query":"!git commit","query_id":25}
Summary: 25 calls
Wall: avg=3.99ms, min=238.17µs, max=7.15ms, p50=4.29ms, p99=7.15ms
Busy: avg=182ns, min=83ns, max=417ns, p50=167ns, p99=417ns
```
**Nucleo matching time (in daemon)**
```
Individual calls for: nucleo_match
--------------------------------------------------------------------------------------------------------------
# Wall Busy Idle Fields
--------------------------------------------------------------------------------------------------------------
1 1.73ms 125ns 1.73ms {"query":"^","query_id":1}
2 4.57ms 167ns 4.57ms {"query":"^c","query_id":2}
3 4.37ms 125ns 4.37ms {"query":"^ca","query_id":3}
4 2.51ms 84ns 2.51ms {"query":"^car","query_id":4}
5 4.43ms 125ns 4.43ms {"query":"^carg","query_id":5}
6 3.64ms 125ns 3.64ms {"query":"^cargo","query_id":6}
7 2.37ms 84ns 2.37ms {"query":"^cargo ","query_id":7}
8 4.11ms 125ns 4.11ms {"query":"^cargo b","query_id":8}
9 4.36ms 208ns 4.36ms {"query":"^cargo bu","query_id":9}
10 3.85ms 125ns 3.85ms {"query":"^cargo bui","query_id":10}
11 4.35ms 125ns 4.35ms {"query":"^cargo buil","query_id":11}
12 3.94ms 250ns 3.94ms {"query":"^cargo build","query_id":12}
13 4.59ms 125ns 4.59ms {"query":"^cargo build$","query_id":13}
14 4.18ms 84ns 4.18ms {"query":"^cargo ","query_id":14}
15 220.13µs 125ns 220.00µs {"query":"!","query_id":15}
16 4.43ms 125ns 4.43ms {"query":"!g","query_id":16}
17 3.45ms 125ns 3.45ms {"query":"!gi","query_id":17}
18 4.55ms 125ns 4.55ms {"query":"!git","query_id":18}
19 7.12ms 209ns 7.12ms {"query":"!git ","query_id":19}
20 4.25ms 166ns 4.25ms {"query":"!git c","query_id":20}
21 5.18ms 125ns 5.18ms {"query":"!git co","query_id":21}
22 4.27ms 125ns 4.27ms {"query":"!git com","query_id":22}
23 4.06ms 292ns 4.06ms {"query":"!git comm","query_id":23}
24 4.46ms 166ns 4.46ms {"query":"!git commi","query_id":24}
25 4.31ms 208ns 4.31ms {"query":"!git commit","query_id":25}
Summary: 25 calls
Wall: avg=3.97ms, min=220.13µs, max=7.12ms, p50=4.27ms, p99=7.12ms
Busy: avg=147ns, min=84ns, max=292ns, p50=125ns, p99=292ns
```
421 lines
12 KiB
TypeScript
Executable File
421 lines
12 KiB
TypeScript
Executable File
#!/usr/bin/env bun
|
|
/**
|
|
* Analyze span timing JSON logs generated with ATUIN_SPAN
|
|
*
|
|
* Usage: bun scripts/span-table.ts <file.json> [options]
|
|
* --filter <pattern> Only show spans matching pattern (regex)
|
|
* --sort <field> Sort by: calls, avg, total, p99 (default: total)
|
|
* --top <n> Show top N spans (default: 20)
|
|
* --detail <span> Show individual calls for a specific span
|
|
* --all Include internal/library spans
|
|
*/
|
|
|
|
import { readFileSync } from "fs";
|
|
|
|
interface SpanEvent {
|
|
timestamp: string;
|
|
level: string;
|
|
fields: {
|
|
message: string;
|
|
"time.busy"?: string;
|
|
"time.idle"?: string;
|
|
};
|
|
target: string;
|
|
span?: {
|
|
name: string;
|
|
[key: string]: unknown;
|
|
};
|
|
spans?: Array<{ name: string; [key: string]: unknown }>;
|
|
}
|
|
|
|
interface SpanStats {
|
|
name: string;
|
|
calls: number;
|
|
busyTimes: number[]; // in microseconds
|
|
idleTimes: number[];
|
|
parentCounts: Map<string, number>; // parent span name -> count
|
|
}
|
|
|
|
// Parse duration strings like "1.23ms", "456µs", "789ns" to microseconds
|
|
function parseDuration(duration: string): number {
|
|
const match = duration.match(/^([\d.]+)(ns|µs|us|ms|s)$/);
|
|
if (!match) return 0;
|
|
|
|
const value = parseFloat(match[1]);
|
|
const unit = match[2];
|
|
|
|
switch (unit) {
|
|
case "ns":
|
|
return value / 1000;
|
|
case "µs":
|
|
case "us":
|
|
return value;
|
|
case "ms":
|
|
return value * 1000;
|
|
case "s":
|
|
return value * 1_000_000;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// Format microseconds for display
|
|
function formatDuration(us: number): string {
|
|
if (us < 1) {
|
|
return `${(us * 1000).toFixed(0)}ns`;
|
|
} else if (us < 1000) {
|
|
return `${us.toFixed(2)}µs`;
|
|
} else if (us < 1_000_000) {
|
|
return `${(us / 1000).toFixed(2)}ms`;
|
|
} else {
|
|
return `${(us / 1_000_000).toFixed(2)}s`;
|
|
}
|
|
}
|
|
|
|
function percentile(arr: number[], p: number): number {
|
|
if (arr.length === 0) return 0;
|
|
const sorted = [...arr].sort((a, b) => a - b);
|
|
const idx = Math.floor(sorted.length * p);
|
|
return sorted[Math.min(idx, sorted.length - 1)];
|
|
}
|
|
|
|
function parseJsonLines(content: string): SpanEvent[] {
|
|
const events: SpanEvent[] = [];
|
|
for (const line of content.trim().split("\n")) {
|
|
if (!line.trim()) continue;
|
|
try {
|
|
events.push(JSON.parse(line));
|
|
} catch {
|
|
// Skip malformed lines
|
|
}
|
|
}
|
|
return events;
|
|
}
|
|
|
|
function main() {
|
|
const args = process.argv.slice(2);
|
|
|
|
// Parse arguments
|
|
let filterPattern: RegExp | null = null;
|
|
let sortField = "total";
|
|
let topN = 20;
|
|
let detailSpan: string | null = null;
|
|
let showAll = false;
|
|
const files: string[] = [];
|
|
|
|
for (let i = 0; i < args.length; i++) {
|
|
if (args[i] === "--filter" && args[i + 1]) {
|
|
filterPattern = new RegExp(args[++i]);
|
|
} else if (args[i] === "--sort" && args[i + 1]) {
|
|
sortField = args[++i];
|
|
} else if (args[i] === "--top" && args[i + 1]) {
|
|
topN = parseInt(args[++i], 10);
|
|
} else if (args[i] === "--detail" && args[i + 1]) {
|
|
detailSpan = args[++i];
|
|
} else if (args[i] === "--all") {
|
|
showAll = true;
|
|
} else if (!args[i].startsWith("-")) {
|
|
files.push(args[i]);
|
|
}
|
|
}
|
|
|
|
if (files.length === 0) {
|
|
console.error("Usage: bun scripts/span-table.ts <file.json> [options]");
|
|
console.error(" --filter <pattern> Only show spans matching pattern (regex)");
|
|
console.error(" --sort <field> Sort by: calls, avg, total, p99 (default: total)");
|
|
console.error(" --top <n> Show top N spans (default: 20)");
|
|
console.error(" --detail <span> Show individual calls for a specific span");
|
|
console.error(" --all Include internal/library spans");
|
|
process.exit(1);
|
|
}
|
|
|
|
// Parse all files
|
|
const allEvents: SpanEvent[] = [];
|
|
for (const file of files) {
|
|
const content = readFileSync(file, "utf-8");
|
|
for (const event of parseJsonLines(content)) {
|
|
allEvents.push(event);
|
|
}
|
|
}
|
|
|
|
// Filter to close events and aggregate by span name
|
|
const spans = new Map<string, SpanStats>();
|
|
|
|
for (const event of allEvents) {
|
|
if (event.fields?.message !== "close") continue;
|
|
if (!event.span?.name) continue;
|
|
if (!event.fields["time.busy"]) continue;
|
|
|
|
const name = event.span.name;
|
|
|
|
// Apply filter if specified
|
|
if (filterPattern && !filterPattern.test(name)) continue;
|
|
|
|
// Skip noisy internal spans unless explicitly requested
|
|
if (
|
|
!showAll &&
|
|
!filterPattern &&
|
|
!detailSpan &&
|
|
(name.startsWith("FramedRead::") ||
|
|
name.startsWith("FramedWrite::") ||
|
|
name.startsWith("Prioritize::") ||
|
|
name === "poll" ||
|
|
name === "poll_ready" ||
|
|
name === "Connection" ||
|
|
name.startsWith("assign_") ||
|
|
name.startsWith("reserve_") ||
|
|
name.startsWith("try_") ||
|
|
name.startsWith("send_") ||
|
|
name.startsWith("pop_"))
|
|
) {
|
|
continue;
|
|
}
|
|
|
|
if (!spans.has(name)) {
|
|
spans.set(name, { name, calls: 0, busyTimes: [], idleTimes: [], parentCounts: new Map() });
|
|
}
|
|
|
|
const stats = spans.get(name)!;
|
|
stats.calls++;
|
|
stats.busyTimes.push(parseDuration(event.fields["time.busy"]));
|
|
if (event.fields["time.idle"]) {
|
|
stats.idleTimes.push(parseDuration(event.fields["time.idle"]));
|
|
}
|
|
|
|
// Track parent relationship (immediate parent is the last element in spans array)
|
|
const parents = event.spans || [];
|
|
const parentName = parents.length > 0 ? parents[parents.length - 1].name : "__root__";
|
|
stats.parentCounts.set(parentName, (stats.parentCounts.get(parentName) || 0) + 1);
|
|
}
|
|
|
|
if (spans.size === 0) {
|
|
console.error("No matching span close events found");
|
|
process.exit(1);
|
|
}
|
|
|
|
// Detail mode: show individual calls for a specific span
|
|
if (detailSpan) {
|
|
const detailEvents: Array<{
|
|
timestamp: string;
|
|
busy: number;
|
|
idle: number;
|
|
fields: Record<string, unknown>;
|
|
parents: string[];
|
|
}> = [];
|
|
|
|
for (const event of allEvents) {
|
|
if (event.fields?.message !== "close") continue;
|
|
if (event.span?.name !== detailSpan) continue;
|
|
if (!event.fields["time.busy"]) continue;
|
|
|
|
// Extract span fields (excluding name)
|
|
const fields: Record<string, unknown> = {};
|
|
if (event.span) {
|
|
for (const [k, v] of Object.entries(event.span)) {
|
|
if (k !== "name") fields[k] = v;
|
|
}
|
|
}
|
|
|
|
// Get parent span names
|
|
const parents = (event.spans || []).map((s) => s.name);
|
|
|
|
detailEvents.push({
|
|
timestamp: event.timestamp,
|
|
busy: parseDuration(event.fields["time.busy"]),
|
|
idle: event.fields["time.idle"] ? parseDuration(event.fields["time.idle"]) : 0,
|
|
fields,
|
|
parents,
|
|
});
|
|
}
|
|
|
|
if (detailEvents.length === 0) {
|
|
console.error(`No events found for span "${detailSpan}"`);
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log("");
|
|
console.log(`Individual calls for: ${detailSpan}`);
|
|
console.log("-".repeat(110));
|
|
console.log(
|
|
"#".padStart(4) +
|
|
"Wall".padStart(12) +
|
|
"Busy".padStart(12) +
|
|
"Idle".padStart(12) +
|
|
" Fields"
|
|
);
|
|
console.log("-".repeat(110));
|
|
|
|
detailEvents.forEach((e, i) => {
|
|
const fieldsStr = Object.keys(e.fields).length > 0
|
|
? JSON.stringify(e.fields)
|
|
: "";
|
|
|
|
console.log(
|
|
(i + 1).toString().padStart(4) +
|
|
formatDuration(e.busy + e.idle).padStart(12) +
|
|
formatDuration(e.busy).padStart(12) +
|
|
formatDuration(e.idle).padStart(12) +
|
|
" " +
|
|
fieldsStr
|
|
);
|
|
});
|
|
|
|
// Summary stats
|
|
const busyTimes = detailEvents.map((e) => e.busy);
|
|
const wallTimes = detailEvents.map((e) => e.busy + e.idle);
|
|
console.log("");
|
|
console.log(
|
|
`Summary: ${detailEvents.length} calls\n` +
|
|
` Wall: avg=${formatDuration(wallTimes.reduce((a, b) => a + b, 0) / wallTimes.length)}, ` +
|
|
`min=${formatDuration(Math.min(...wallTimes))}, ` +
|
|
`max=${formatDuration(Math.max(...wallTimes))}, ` +
|
|
`p50=${formatDuration(percentile(wallTimes, 0.5))}, ` +
|
|
`p99=${formatDuration(percentile(wallTimes, 0.99))}\n` +
|
|
` Busy: avg=${formatDuration(busyTimes.reduce((a, b) => a + b, 0) / busyTimes.length)}, ` +
|
|
`min=${formatDuration(Math.min(...busyTimes))}, ` +
|
|
`max=${formatDuration(Math.max(...busyTimes))}, ` +
|
|
`p50=${formatDuration(percentile(busyTimes, 0.5))}, ` +
|
|
`p99=${formatDuration(percentile(busyTimes, 0.99))}`
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Calculate stats
|
|
const results = [...spans.values()].map((s) => {
|
|
// Calculate wall times (busy + idle) for each call
|
|
const wallTimes = s.busyTimes.map((busy, i) => busy + (s.idleTimes[i] || 0));
|
|
|
|
// Find most common parent
|
|
let mostCommonParent = "__root__";
|
|
let maxCount = 0;
|
|
for (const [parent, count] of s.parentCounts) {
|
|
if (count > maxCount) {
|
|
maxCount = count;
|
|
mostCommonParent = parent;
|
|
}
|
|
}
|
|
|
|
return {
|
|
name: s.name,
|
|
calls: s.calls,
|
|
total: s.busyTimes.reduce((a, b) => a + b, 0),
|
|
avg: s.busyTimes.reduce((a, b) => a + b, 0) / s.calls,
|
|
min: Math.min(...s.busyTimes),
|
|
max: Math.max(...s.busyTimes),
|
|
p50: percentile(s.busyTimes, 0.5),
|
|
p99: percentile(s.busyTimes, 0.99),
|
|
avgWall: wallTimes.reduce((a, b) => a + b, 0) / s.calls,
|
|
p50Wall: percentile(wallTimes, 0.5),
|
|
p99Wall: percentile(wallTimes, 0.99),
|
|
parent: mostCommonParent,
|
|
};
|
|
});
|
|
|
|
// Build tree structure
|
|
const childrenOf = new Map<string, string[]>();
|
|
childrenOf.set("__root__", []);
|
|
for (const r of results) {
|
|
if (!childrenOf.has(r.name)) {
|
|
childrenOf.set(r.name, []);
|
|
}
|
|
if (!childrenOf.has(r.parent)) {
|
|
childrenOf.set(r.parent, []);
|
|
}
|
|
childrenOf.get(r.parent)!.push(r.name);
|
|
}
|
|
|
|
// Sort children by the specified field
|
|
const resultMap = new Map(results.map(r => [r.name, r]));
|
|
const sortChildren = (children: string[]) => {
|
|
children.sort((a, b) => {
|
|
const ra = resultMap.get(a);
|
|
const rb = resultMap.get(b);
|
|
if (!ra || !rb) return 0;
|
|
switch (sortField) {
|
|
case "calls":
|
|
return rb.calls - ra.calls;
|
|
case "avg":
|
|
return rb.avg - ra.avg;
|
|
case "p99":
|
|
return rb.p99 - ra.p99;
|
|
case "total":
|
|
default:
|
|
return rb.total - ra.total;
|
|
}
|
|
});
|
|
};
|
|
|
|
// Traverse tree to build ordered display list with depths
|
|
const displayResults: Array<{ result: typeof results[0]; depth: number }> = [];
|
|
const visited = new Set<string>();
|
|
|
|
function traverse(name: string, depth: number) {
|
|
if (visited.has(name)) return;
|
|
visited.add(name);
|
|
|
|
const result = resultMap.get(name);
|
|
if (result) {
|
|
displayResults.push({ result, depth });
|
|
}
|
|
|
|
const children = childrenOf.get(name) || [];
|
|
sortChildren(children);
|
|
for (const child of children) {
|
|
traverse(child, depth + 1);
|
|
}
|
|
}
|
|
|
|
// Start from roots
|
|
const roots = childrenOf.get("__root__") || [];
|
|
sortChildren(roots);
|
|
for (const root of roots) {
|
|
traverse(root, 0);
|
|
}
|
|
|
|
// Add any orphaned spans (whose parent wasn't in our span list)
|
|
for (const r of results) {
|
|
if (!visited.has(r.name)) {
|
|
displayResults.push({ result: r, depth: 0 });
|
|
}
|
|
}
|
|
|
|
// Apply topN limit
|
|
const limitedResults = displayResults.slice(0, topN);
|
|
|
|
console.log("");
|
|
console.log(
|
|
"Span Name".padEnd(40) +
|
|
"Calls".padStart(6) +
|
|
"Avg(wall)".padStart(11) +
|
|
"P50(wall)".padStart(11) +
|
|
"P99(wall)".padStart(11) +
|
|
"Avg(busy)".padStart(11) +
|
|
"P50(busy)".padStart(11) +
|
|
"P99(busy)".padStart(11)
|
|
);
|
|
console.log("-".repeat(112));
|
|
|
|
for (const { result: r, depth } of limitedResults) {
|
|
const indent = " ".repeat(depth);
|
|
const maxNameLen = 38 - indent.length;
|
|
const truncatedName = r.name.length > maxNameLen ? "..." + r.name.slice(-(maxNameLen - 3)) : r.name;
|
|
const displayName = indent + truncatedName;
|
|
|
|
console.log(
|
|
displayName.padEnd(40) +
|
|
r.calls.toString().padStart(6) +
|
|
formatDuration(r.avgWall).padStart(11) +
|
|
formatDuration(r.p50Wall).padStart(11) +
|
|
formatDuration(r.p99Wall).padStart(11) +
|
|
formatDuration(r.avg).padStart(11) +
|
|
formatDuration(r.p50).padStart(11) +
|
|
formatDuration(r.p99).padStart(11)
|
|
);
|
|
}
|
|
|
|
console.log("");
|
|
console.log(`Showing ${limitedResults.length} of ${results.length} spans (sorted by ${sortField})`);
|
|
}
|
|
|
|
main();
|