diff --git a/README.md b/README.md index 903a3e4..98a0ea1 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,18 @@ notte auth logout # Remove API key from keychain notte auth status # Show authentication status ``` +### Web Search + +```bash +notte search # Search the web for a query +notte search --depth fast|standard|deep # Tune search depth (default: standard) +notte search --output-type sourcedAnswer # Get an LLM answer with sources +``` + +The query may be quoted (`notte search "what is anthropic"`) or passed as separate +words (`notte search what is anthropic`). Use `--output json` to get the raw API +response for scripting. + ### Browser Sessions ```bash diff --git a/internal/cmd/root.go b/internal/cmd/root.go index f34b081..feb004c 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -37,6 +37,7 @@ for browser automation, AI agents, and web scraping. Get started: notte auth login # Authenticate with your API key notte sessions start # Start a browser session + notte search # Search the web notte scrape # Quick scrape a webpage`, SilenceUsage: true, SilenceErrors: true, diff --git a/internal/cmd/search.go b/internal/cmd/search.go new file mode 100644 index 0000000..422116a --- /dev/null +++ b/internal/cmd/search.go @@ -0,0 +1,225 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "html" + "os" + "strings" + + "github.com/muesli/termenv" + "github.com/spf13/cobra" + + "github.com/nottelabs/notte-cli/internal/api" +) + +var ( + searchDepth string + searchOutputType string + + validSearchDepths = map[string]bool{"standard": true, "fast": true, "deep": true} + validSearchOutputTypes = map[string]bool{"searchResults": true, "sourcedAnswer": true, "structured": true} + + // termenvOutput is shared so we don't allocate a fresh termenv.Output for + // every colorize call (the color profile / TTY detection doesn't change + // during a single command run). + termenvOutput = termenv.NewOutput(os.Stdout) +) + +var searchCmd = &cobra.Command{ + Use: "search ", + Short: "Search the web", + Long: `Search the web via the Notte search API. + +Returns either a list of search results, or an LLM-generated answer with sources, +depending on --output-type. + +Examples: + notte search "what is anthropic" + notte search what is anthropic + notte search "latest llm releases" --depth deep + notte search "what is anthropic" --output-type sourcedAnswer`, + Args: cobra.MinimumNArgs(1), + RunE: runSearch, +} + +func init() { + rootCmd.AddCommand(searchCmd) + searchCmd.Flags().StringVar(&searchDepth, "depth", "", "Search depth: standard, fast, or deep") + searchCmd.Flags().StringVar(&searchOutputType, "output-type", "", "Output type: searchResults, sourcedAnswer, or structured") +} + +func runSearch(cmd *cobra.Command, args []string) error { + query := strings.TrimSpace(strings.Join(args, " ")) + if query == "" { + return fmt.Errorf("search query cannot be empty") + } + if searchDepth != "" && !validSearchDepths[searchDepth] { + return fmt.Errorf("invalid --depth %q: must be standard, fast, or deep", searchDepth) + } + if searchOutputType != "" && !validSearchOutputTypes[searchOutputType] { + return fmt.Errorf("invalid --output-type %q: must be searchResults, sourcedAnswer, or structured", searchOutputType) + } + + client, err := GetClient() + if err != nil { + return err + } + + ctx, cancel := GetContextWithTimeout(cmd.Context()) + defer cancel() + + body := api.SearchRequest{Q: query} + if searchDepth != "" { + body.Depth = &searchDepth + } + if searchOutputType != "" { + body.OutputType = &searchOutputType + } + + resp, err := client.Client().SearchWebWithResponse(ctx, &api.SearchWebParams{}, body) + if err != nil { + return fmt.Errorf("API request failed: %w", err) + } + + if err := HandleAPIResponse(resp.HTTPResponse, resp.Body); err != nil { + return err + } + + return printSearchResponse(resp.Body, query) +} + +// printSearchResponse renders the raw search response body. The /search endpoint +// returns different shapes depending on outputType, so we decode loosely and +// render whichever fields are present. +func printSearchResponse(body []byte, query string) error { + var parsed map[string]any + if err := json.Unmarshal(body, &parsed); err != nil { + // Fall back to printing the raw body if it isn't a JSON object + if IsJSONOutput() { + _, _ = os.Stdout.Write(body) + if len(body) == 0 || body[len(body)-1] != '\n' { + fmt.Println() + } + return nil + } + fmt.Println(string(body)) + return nil + } + + if IsJSONOutput() { + return GetFormatter().Print(parsed) + } + + // sourcedAnswer shape: { "answer": "...", "sources": [...] } + if answer, ok := parsed["answer"].(string); ok { + printAnswer(query, answer) + if sources, ok := parsed["sources"].([]any); ok && len(sources) > 0 { + fmt.Println() + printSources(sources) + } + return nil + } + + // searchResults shape: { "results": [...] } + if results, ok := parsed["results"].([]any); ok { + if len(results) == 0 { + fmt.Printf("No results for %q.\n", query) + return nil + } + printResults(query, results) + return nil + } + + // Unknown shape - fall back to formatter + return GetFormatter().Print(parsed) +} + +func printAnswer(query, answer string) { + header := colorizeText(fmt.Sprintf("Answer for %q:", query), termenv.ANSICyan) + fmt.Println(header) + fmt.Println() + fmt.Println(html.UnescapeString(answer)) +} + +func printSources(sources []any) { + header := colorizeText("Sources:", termenv.ANSICyan) + fmt.Println(header) + for i, raw := range sources { + src, ok := raw.(map[string]any) + if !ok { + continue + } + title := stringField(src, "name", "title") + url := stringField(src, "url") + snippet := stringField(src, "snippet", "content") + + num := fmt.Sprintf("%d.", i+1) + fmt.Printf("%s %s\n", colorizeText(num, termenv.ANSIYellow), html.UnescapeString(title)) + if url != "" { + fmt.Printf(" %s\n", colorizeText(url, termenv.ANSIBlue)) + } + if snippet != "" { + fmt.Printf(" %s\n", truncate(snippet, 240)) + } + } +} + +func printResults(query string, results []any) { + header := colorizeText(fmt.Sprintf("Search results for %q (%d):", query, len(results)), termenv.ANSICyan) + fmt.Println(header) + fmt.Println() + for i, raw := range results { + item, ok := raw.(map[string]any) + if !ok { + continue + } + title := stringField(item, "name", "title") + url := stringField(item, "url") + content := stringField(item, "content", "snippet", "description") + + num := fmt.Sprintf("%d.", i+1) + fmt.Printf("%s %s\n", colorizeText(num, termenv.ANSIYellow), html.UnescapeString(title)) + if url != "" { + fmt.Printf(" %s\n", colorizeText(url, termenv.ANSIBlue)) + } + if content != "" { + fmt.Printf(" %s\n", truncate(content, 240)) + } + if i < len(results)-1 { + fmt.Println() + } + } +} + +// stringField returns the first non-empty string value among the given keys. +func stringField(m map[string]any, keys ...string) string { + for _, k := range keys { + if v, ok := m[k].(string); ok && v != "" { + return v + } + } + return "" +} + +func truncate(s string, maxRunes int) string { + // Decode HTML entities (the API returns ', &, etc.) so the text + // reads naturally in the terminal. JSON output stays raw. + s = html.UnescapeString(s) + s = strings.TrimSpace(s) + // Collapse internal whitespace/newlines so multi-line snippets render on one line + s = strings.Join(strings.Fields(s), " ") + r := []rune(s) + if len(r) <= maxRunes { + return s + } + return string(r[:maxRunes]) + "..." +} + +// colorizeText applies a color via the shared termenv output, respecting --no-color. +func colorizeText(s string, color termenv.ANSIColor) string { + if noColor { + return s + } + return termenvOutput.String(s).Foreground(color).String() +} diff --git a/internal/cmd/search_test.go b/internal/cmd/search_test.go new file mode 100644 index 0000000..656b6cb --- /dev/null +++ b/internal/cmd/search_test.go @@ -0,0 +1,460 @@ +package cmd + +import ( + "context" + "encoding/json" + "strings" + "testing" + + "github.com/spf13/cobra" + + "github.com/nottelabs/notte-cli/internal/auth" + "github.com/nottelabs/notte-cli/internal/testutil" +) + +const ( + searchResultsBody = `{"results":[` + + `{"name":"Home Anthropic","url":"https://www.anthropic.com/","content":"AI safety company.","favicon":"https://x/y","type":"text"},` + + `{"name":"Anthropic - Wikipedia","url":"https://en.wikipedia.org/wiki/Anthropic","content":"Anthropic is an American AI company.","favicon":"https://x/z","type":"text"}` + + `]}` + + searchSourcedAnswerBody = `{"answer":"Anthropic is an AI safety company.","sources":[` + + `{"name":"Home Anthropic","url":"https://www.anthropic.com/","snippet":"Anthropic is an AI safety company."},` + + `{"name":"Wikipedia","url":"https://en.wikipedia.org/wiki/Anthropic","snippet":"American AI firm."}` + + `]}` + + // Mirrors the wire format the live API returns: HTML entities like + // ' (apostrophe) and & (ampersand) inside titles and snippets. + searchEntitiesBody = `{"results":[` + + `{"name":"Anthropic & Claude","url":"https://example.com/a","content":"Anthropic's mission is AI safety."}` + + `]}` + + searchEntitiesAnswerBody = `{"answer":"Anthropic's mission & values.","sources":[` + + `{"name":"Source & co","url":"https://example.com/b","snippet":"It's great."}` + + `]}` + + searchEmptyResultsBody = `{"results":[]}` +) + +// setupSearchTest configures an isolated env with a mock server, a test API key, +// and disables colors. It restores all globals it touches. +func setupSearchTest(t *testing.T) *testutil.MockServer { + t.Helper() + env := testutil.SetupTestEnv(t) + env.SetEnv("NOTTE_API_KEY", "test-key") + + server := testutil.NewMockServer() + t.Cleanup(func() { server.Close() }) + env.SetEnv("NOTTE_API_URL", server.URL()) + + origNoColor := noColor + noColor = true + t.Cleanup(func() { noColor = origNoColor }) + + origDepth := searchDepth + origOutputType := searchOutputType + searchDepth = "" + searchOutputType = "" + t.Cleanup(func() { + searchDepth = origDepth + searchOutputType = origOutputType + }) + + return server +} + +// withFormat temporarily overrides the global outputFormat for a test. +func withFormat(t *testing.T, format string) { + t.Helper() + orig := outputFormat + outputFormat = format + t.Cleanup(func() { outputFormat = orig }) +} + +func TestRunSearch_Results_TextMode(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchResultsBody) + withFormat(t, "text") + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + stdout, _ := testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"what", "is", "anthropic"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + if !strings.Contains(stdout, "Search results for") { + t.Errorf("missing results header, got:\n%s", stdout) + } + if !strings.Contains(stdout, `"what is anthropic"`) { + t.Errorf("expected query echoed back, got:\n%s", stdout) + } + if !strings.Contains(stdout, "Home Anthropic") { + t.Errorf("expected first result title, got:\n%s", stdout) + } + if !strings.Contains(stdout, "https://www.anthropic.com/") { + t.Errorf("expected first result URL, got:\n%s", stdout) + } + if !strings.Contains(stdout, "AI safety company.") { + t.Errorf("expected first result snippet, got:\n%s", stdout) + } +} + +func TestRunSearch_Results_EmptyList(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchEmptyResultsBody) + withFormat(t, "text") + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + stdout, _ := testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"obscure-query"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + if !strings.Contains(stdout, "No results") { + t.Errorf("expected empty-results message, got:\n%s", stdout) + } +} + +func TestRunSearch_SourcedAnswer_TextMode(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchSourcedAnswerBody) + withFormat(t, "text") + + searchOutputType = "sourcedAnswer" + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + stdout, _ := testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"what is anthropic"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + if !strings.Contains(stdout, "Answer for") { + t.Errorf("expected answer header, got:\n%s", stdout) + } + if !strings.Contains(stdout, "Anthropic is an AI safety company.") { + t.Errorf("expected answer body, got:\n%s", stdout) + } + if !strings.Contains(stdout, "Sources:") { + t.Errorf("expected sources header, got:\n%s", stdout) + } + if !strings.Contains(stdout, "https://en.wikipedia.org/wiki/Anthropic") { + t.Errorf("expected source URL, got:\n%s", stdout) + } +} + +func TestRunSearch_JSONMode(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchResultsBody) + withFormat(t, "json") + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + stdout, _ := testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"anthropic"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + var parsed map[string]any + if err := json.Unmarshal([]byte(stdout), &parsed); err != nil { + t.Fatalf("expected JSON output, got %q: %v", stdout, err) + } + results, ok := parsed["results"].([]any) + if !ok { + t.Fatalf("expected results array in JSON output, got %v", parsed) + } + if len(results) != 2 { + t.Errorf("expected 2 results, got %d", len(results)) + } +} + +func TestRunSearch_APIError(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 422, `{"detail":"validation error"}`) + withFormat(t, "text") + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + err := runSearch(cmd, []string{"oops"}) + if err == nil { + t.Fatal("expected error for non-2xx response, got nil") + } +} + +func TestRunSearch_NoAPIKey(t *testing.T) { + env := testutil.SetupTestEnv(t) + auth.SetKeyring(env.MockStore) + t.Cleanup(auth.ResetKeyring) + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + if err := runSearch(cmd, []string{"anthropic"}); err == nil { + t.Error("expected error when no API key is configured") + } +} + +func TestRunSearch_EmptyQueryAfterTrim(t *testing.T) { + setupSearchTest(t) + withFormat(t, "text") + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + err := runSearch(cmd, []string{" "}) + if err == nil { + t.Fatal("expected error for whitespace-only query") + } + if !strings.Contains(err.Error(), "empty") { + t.Errorf("expected 'empty' in error, got: %v", err) + } +} + +func TestRunSearch_RejectsInvalidDepth(t *testing.T) { + setupSearchTest(t) + withFormat(t, "text") + searchDepth = "turbo" + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + err := runSearch(cmd, []string{"anthropic"}) + if err == nil { + t.Fatal("expected error for invalid depth, got nil") + } + if !strings.Contains(err.Error(), "invalid --depth") { + t.Errorf("expected 'invalid --depth' in error, got: %v", err) + } +} + +func TestRunSearch_RejectsInvalidOutputType(t *testing.T) { + setupSearchTest(t) + withFormat(t, "text") + searchOutputType = "bogus" + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + err := runSearch(cmd, []string{"anthropic"}) + if err == nil { + t.Fatal("expected error for invalid output type, got nil") + } + if !strings.Contains(err.Error(), "invalid --output-type") { + t.Errorf("expected 'invalid --output-type' in error, got: %v", err) + } +} + +func TestRunSearch_AcceptsAllValidDepths(t *testing.T) { + for _, depth := range []string{"standard", "fast", "deep"} { + t.Run(depth, func(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchResultsBody) + withFormat(t, "json") + searchDepth = depth + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + _, _ = testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"anthropic"}); err != nil { + t.Errorf("depth=%q should be accepted, got error: %v", depth, err) + } + }) + }) + } +} + +func TestRunSearch_AcceptsAllValidOutputTypes(t *testing.T) { + for _, ot := range []string{"searchResults", "sourcedAnswer", "structured"} { + t.Run(ot, func(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchResultsBody) + withFormat(t, "json") + searchOutputType = ot + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + _, _ = testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"anthropic"}); err != nil { + t.Errorf("output-type=%q should be accepted, got error: %v", ot, err) + } + }) + }) + } +} + +func TestSearchCmd_RequiresArgs(t *testing.T) { + if err := searchCmd.Args(searchCmd, nil); err == nil { + t.Error("expected error when called with no args") + } + if err := searchCmd.Args(searchCmd, []string{"query"}); err != nil { + t.Errorf("expected no error for valid args, got: %v", err) + } +} + +func TestPrintSearchResponse_FallsBackForUnknownShape(t *testing.T) { + origFormat := outputFormat + outputFormat = "text" + t.Cleanup(func() { outputFormat = origFormat }) + + body := []byte(`{"unexpected":"payload","other":42}`) + stdout, _ := testutil.CaptureOutput(func() { + if err := printSearchResponse(body, "q"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + if !strings.Contains(stdout, "unexpected") || !strings.Contains(stdout, "payload") { + t.Errorf("expected formatter fallback to print fields, got:\n%s", stdout) + } +} + +func TestPrintSearchResponse_NonJSONBody(t *testing.T) { + origFormat := outputFormat + outputFormat = "text" + t.Cleanup(func() { outputFormat = origFormat }) + + body := []byte("not json at all") + stdout, _ := testutil.CaptureOutput(func() { + if err := printSearchResponse(body, "q"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + if !strings.Contains(stdout, "not json at all") { + t.Errorf("expected raw body in output, got:\n%s", stdout) + } +} + +func TestTruncate(t *testing.T) { + tests := []struct { + name string + in string + max int + want string + }{ + {"short stays", "hello", 10, "hello"}, + {"trims whitespace", " hello ", 10, "hello"}, + {"collapses newlines", "hello\n\nworld", 20, "hello world"}, + {"truncates with ellipsis", "abcdefghij", 5, "abcde..."}, + {"decodes html entities", "Anthropic's & Claude", 50, "Anthropic's & Claude"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := truncate(tt.in, tt.max) + if got != tt.want { + t.Errorf("truncate(%q, %d) = %q, want %q", tt.in, tt.max, got, tt.want) + } + }) + } +} + +func TestRunSearch_DecodesHTMLEntitiesInTextMode(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchEntitiesBody) + withFormat(t, "text") + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + stdout, _ := testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"anthropic"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + if strings.Contains(stdout, "'") || strings.Contains(stdout, "&") { + t.Errorf("expected HTML entities to be decoded in text output, got:\n%s", stdout) + } + if !strings.Contains(stdout, "Anthropic & Claude") { + t.Errorf("expected decoded title 'Anthropic & Claude', got:\n%s", stdout) + } + if !strings.Contains(stdout, "Anthropic's mission is AI safety.") { + t.Errorf("expected decoded snippet, got:\n%s", stdout) + } +} + +func TestRunSearch_DecodesHTMLEntitiesInSourcedAnswer(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchEntitiesAnswerBody) + withFormat(t, "text") + searchOutputType = "sourcedAnswer" + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + stdout, _ := testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"anthropic"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + if strings.Contains(stdout, "'") || strings.Contains(stdout, "&") { + t.Errorf("expected HTML entities to be decoded in answer/sources, got:\n%s", stdout) + } + if !strings.Contains(stdout, "Anthropic's mission & values.") { + t.Errorf("expected decoded answer, got:\n%s", stdout) + } + if !strings.Contains(stdout, "Source & co") { + t.Errorf("expected decoded source title, got:\n%s", stdout) + } +} + +func TestRunSearch_PreservesEntitiesInJSONMode(t *testing.T) { + server := setupSearchTest(t) + server.AddResponse("/search", 200, searchEntitiesBody) + withFormat(t, "json") + + cmd := &cobra.Command{} + cmd.SetContext(context.Background()) + + stdout, _ := testutil.CaptureOutput(func() { + if err := runSearch(cmd, []string{"anthropic"}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + var parsed map[string]any + if err := json.Unmarshal([]byte(stdout), &parsed); err != nil { + t.Fatalf("expected JSON output, got %q: %v", stdout, err) + } + results, _ := parsed["results"].([]any) + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + first, _ := results[0].(map[string]any) + if name, _ := first["name"].(string); name != "Anthropic & Claude" { + t.Errorf("JSON mode should preserve raw API entities, got %q", name) + } + if content, _ := first["content"].(string); content != "Anthropic's mission is AI safety." { + t.Errorf("JSON mode should preserve raw API entities, got %q", content) + } +} + +func TestStringField(t *testing.T) { + m := map[string]any{ + "title": "", + "name": "Hello", + "url": "https://example.com", + "missing": 42, // wrong type, should be ignored + } + if got := stringField(m, "title", "name"); got != "Hello" { + t.Errorf("expected fallback to 'name', got %q", got) + } + if got := stringField(m, "url"); got != "https://example.com" { + t.Errorf("expected url, got %q", got) + } + if got := stringField(m, "missing", "absent"); got != "" { + t.Errorf("expected empty for non-string/missing keys, got %q", got) + } +}