From 3ed90844cb5bfd3c3a849172f0168b1cfdb3ed14 Mon Sep 17 00:00:00 2001 From: luajscss <70727706+luajscss@users.noreply.github.com> Date: Tue, 7 Apr 2026 05:24:18 +0300 Subject: [PATCH] feat: add Deep Reverse mode using DeepWiki + LLM multi-phase pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deep Reverse produces detailed, comprehensive prompts (2000-4000 words) by combining DeepWiki documentation analysis with LLM synthesis. Pipeline: wiki structure → LLM selects key topics → fetch full wiki → extract selected sections → LLM synthesizes actionable prompt. Includes: SSE streaming progress, combined browser + server-side fingerprint rate limiting (3/week), gradual rollout toggle, Supabase caching, in-flight dedup, and shared LLM client refactor. --- .env.example | 26 ++ .gitignore | 1 + README.md | 22 ++ app/api/deep-reverse/route.ts | 413 +++++++++++++++++++++++++++++ app/api/library/route.ts | 39 ++- app/api/reverse-prompt/route.ts | 84 +----- components/reverse-prompt-home.tsx | 300 ++++++++++++++++++++- lib/deep-reverse-prompts.ts | 107 ++++++++ lib/deepwiki-client.ts | 172 ++++++++++++ lib/fingerprint-server.ts | 23 ++ lib/fingerprint.ts | 24 ++ lib/llm-client.ts | 130 +++++++++ 12 files changed, 1254 insertions(+), 87 deletions(-) create mode 100644 .env.example create mode 100644 app/api/deep-reverse/route.ts create mode 100644 lib/deep-reverse-prompts.ts create mode 100644 lib/deepwiki-client.ts create mode 100644 lib/fingerprint-server.ts create mode 100644 lib/fingerprint.ts create mode 100644 lib/llm-client.ts diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..43ed83f --- /dev/null +++ b/.env.example @@ -0,0 +1,26 @@ +# LLM Provider (at least one required) +OPENROUTER_API_KEY= +OPENROUTER_MODEL=google/gemini-2.5-pro +# or +GOOGLE_GENERATIVE_AI_API_KEY= +GOOGLE_AI_STUDIO_MODEL=gemini-2.5-pro + +# GitHub (optional — improves rate limits) +GITHUB_TOKEN= + +# OpenRouter metadata (optional) +OPENROUTER_HTTP_REFERER= +OPENROUTER_APP_TITLE= + +# Supabase (optional — enables caching + library) +SUPABASE_URL= +SUPABASE_PUBLISHABLE_KEY= + +# Cache TTL in hours (default: 24) +CACHE_TTL_HOURS=24 + +# Deep Reverse (optional — requires Supabase) +DEEP_REVERSE_ROLLOUT_PERCENT=50 +DEEP_REVERSE_MAX_USES_PER_WEEK=3 +DEEP_CACHE_TTL_HOURS=168 +DEEPWIKI_MCP_URL=https://mcp.deepwiki.com/mcp diff --git a/.gitignore b/.gitignore index 5ef6a52..7b8da95 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ yarn-error.log* # env files (can opt-in for committing if needed) .env* +!.env.example # vercel .vercel diff --git a/README.md b/README.md index ab3c7cf..0ee43db 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,32 @@ Paste a GitHub URL or `owner/repo` on the home page. You can also open **`/owner Next.js (App Router), React, TypeScript, Tailwind CSS, GitHub API, OpenRouter. +## Deep Reverse + +**Deep Reverse** is an optional mode that produces a detailed, comprehensive prompt (2000–4000 words) by combining [DeepWiki](https://deepwiki.com/) documentation analysis with LLM synthesis. The pipeline: + +1. Fetches the repository's wiki structure from DeepWiki (MCP API) +2. An LLM selects 1–5 key documentation sections to examine +3. Fetches full wiki documentation and extracts the selected sections +4. An LLM synthesizes everything into one actionable prompt + +Deep Reverse is **rate-limited** (3 uses per week per browser fingerprint) and **gradually rolled out** via `DEEP_REVERSE_ROLLOUT_PERCENT`. It requires Supabase for caching and rate tracking. Results are cached separately in `deep_prompt_cache`. + +If a repository has not been indexed by DeepWiki, the request is declined and the usage attempt is refunded. + ## Configuration Copy `.env.example` to `.env.local`. You need **`OPENROUTER_API_KEY`**. Optional: `OPENROUTER_MODEL` (defaults to `google/gemini-2.5-pro`), `GITHUB_TOKEN` for better GitHub rate limits, and Supabase env vars from the example file if you want server-side caching. +### Deep Reverse env vars (optional) + +| Variable | Default | Description | +|---|---|---| +| `DEEP_REVERSE_ROLLOUT_PERCENT` | `50` | Percentage of browser fingerprints that see the Deep Reverse toggle (0–100) | +| `DEEP_REVERSE_MAX_USES_PER_WEEK` | `3` | Deep reverse uses per fingerprint per week | +| `DEEP_CACHE_TTL_HOURS` | `168` | How long deep prompts are cached (default 7 days) | +| `DEEPWIKI_MCP_URL` | `https://mcp.deepwiki.com/mcp` | DeepWiki MCP endpoint override | + ## Development ```bash diff --git a/app/api/deep-reverse/route.ts b/app/api/deep-reverse/route.ts new file mode 100644 index 0000000..eb90d50 --- /dev/null +++ b/app/api/deep-reverse/route.ts @@ -0,0 +1,413 @@ +/** + * Deep Reverse API — multi-phase pipeline using DeepWiki + LLM. + * + * GET → eligibility check + remaining uses (fingerprint derived from headers) + * POST { repoUrl } → SSE stream with progress events + */ + +import { NextRequest, NextResponse } from "next/server"; +import { readWikiStructure, readWikiContents, extractPages } from "@/lib/deepwiki-client"; +import { deriveFingerprint } from "@/lib/fingerprint-server"; +import { getRepoMeta } from "@/lib/github-client"; +import { type LlmTarget, resolveLlmTarget, callLlm } from "@/lib/llm-client"; +import { parseGitHubRepoInput } from "@/lib/parse-github-repo"; +import { getSupabase } from "@/lib/supabase"; +import { + DEEP_ANALYSIS_PROMPT, + DEEP_SYNTHESIS_PROMPT, + buildAnalysisUserMessage, + buildSynthesisUserMessage, +} from "@/lib/deep-reverse-prompts"; + +const MAX_USES_PER_WEEK = Number(process.env.DEEP_REVERSE_MAX_USES_PER_WEEK) || 3; +const DEEP_MAX_TOKENS = 14_000; +const ANALYSIS_MAX_TOKENS = 1_000; +const CACHE_TTL_HOURS = Number(process.env.DEEP_CACHE_TTL_HOURS) || 168; +const ROLLOUT_PERCENT = Number( + process.env.DEEP_REVERSE_ROLLOUT_PERCENT ?? "50" +); + +/* Per-instance dedup — prevents duplicate processing within a single serverless instance. + Does not deduplicate across multiple Vercel instances. */ +const inFlight = new Set(); + +/** Deterministic rollout gate based on the last 2 hex chars of fingerprint. */ +function isEligible(fingerprint: string): boolean { + const tail = parseInt(fingerprint.slice(-2), 16); + return tail < (ROLLOUT_PERCENT / 100) * 256; +} + +/** Parse the LLM's JSON array of section paths, tolerating markdown fences. */ +function parseSectionPaths(raw: string): string[] { + let cleaned = raw.trim(); + if (cleaned.startsWith("```")) { + cleaned = cleaned + .replace(/^```(?:json)?\s*/, "") + .replace(/\s*```$/, ""); + } + const arr: unknown = JSON.parse(cleaned); + if (!Array.isArray(arr)) return []; + return arr + .filter((s): s is string => typeof s === "string" && s.trim().length > 0) + .slice(0, 5); +} + +function createDeepReverseStream( + owner: string, + repo: string, + usageId: number | null, + repoKey: string, + llm: LlmTarget +): ReadableStream { + const encoder = new TextEncoder(); + + return new ReadableStream({ + async start(controller) { + const send = (event: string, payload: Record) => { + try { + controller.enqueue( + encoder.encode( + `event: ${event}\ndata: ${JSON.stringify(payload)}\n\n` + ) + ); + } catch { + /* client disconnected — swallow */ + } + }; + + const supabase = getSupabase(); + + try { + /* Phase 1: Wiki structure */ + send("progress", { + phase: "structure", + message: "Fetching wiki structure…", + }); + + let structure: string; + try { + structure = await readWikiStructure(owner, repo); + } catch { + /* Repo not indexed — refund the usage attempt */ + if (usageId && supabase) { + await supabase + .from("deep_reverse_usage") + .delete() + .eq("id", usageId); + } + send("error", { + code: "deepwiki_not_indexed", + message: `${owner}/${repo} is not yet indexed by DeepWiki. Try again later or use standard reverse.`, + }); + return; + } + + if (!structure || structure.trim().length < 20) { + if (usageId && supabase) { + await supabase + .from("deep_reverse_usage") + .delete() + .eq("id", usageId); + } + send("error", { + code: "deepwiki_not_indexed", + message: `${owner}/${repo} has no DeepWiki documentation yet.`, + }); + return; + } + + /* Phase 2: LLM picks 1-5 key sections */ + send("progress", { + phase: "analysis", + message: "Analyzing structure…", + }); + const analysisMsg = buildAnalysisUserMessage( + owner, + repo, + structure + ); + const sectionsRaw = await callLlm( + llm, + DEEP_ANALYSIS_PROMPT, + analysisMsg, + ANALYSIS_MAX_TOKENS + ); + + let sectionPaths: string[]; + try { + sectionPaths = parseSectionPaths(sectionsRaw); + } catch { + sectionPaths = []; + } + + /* Phase 3: Fetch full wiki and extract selected sections */ + send("progress", { + phase: "details", + message: "Fetching full documentation…", + }); + + let fullWiki: string; + try { + fullWiki = await readWikiContents(owner, repo); + } catch { + /* read_wiki_contents failed — refund and bail */ + if (usageId && supabase) { + await supabase + .from("deep_reverse_usage") + .delete() + .eq("id", usageId); + } + send("error", { + code: "deepwiki_not_indexed", + message: `Could not fetch documentation for ${owner}/${repo}. Try indexing it at deepwiki.com first.`, + }); + return; + } + + const extracted = sectionPaths.length > 0 + ? extractPages(fullWiki, sectionPaths) + : []; + + const sections: Array<{ path: string; content: string }> = extracted.map( + (p) => ({ path: p.title, content: p.content }) + ); + + /* Fallback: if no sections matched, use the full wiki (truncated) */ + if (sections.length === 0) { + const truncated = fullWiki.length > 60_000 + ? fullWiki.slice(0, 60_000) + "\n\n… (truncated)" + : fullWiki; + sections.push({ path: "Full Documentation", content: truncated }); + } + + send("progress", { + phase: "details", + message: `Extracted ${sections.length} sections from documentation.`, + }); + + /* Fetch repo metadata from GitHub (best-effort) */ + let meta: { + description: string | null; + language: string | null; + stargazers_count: number; + }; + try { + meta = await getRepoMeta(owner, repo); + } catch { + meta = { description: null, language: null, stargazers_count: 0 }; + } + + /* Phase 4: Synthesize deep prompt */ + send("progress", { + phase: "synthesis", + message: "Generating deep prompt…", + }); + const synthMsg = buildSynthesisUserMessage( + owner, + repo, + meta, + sections + ); + const prompt = await callLlm( + llm, + DEEP_SYNTHESIS_PROMPT, + synthMsg, + DEEP_MAX_TOKENS + ); + + /* Cache the result */ + if (supabase) { + await supabase + .from("deep_prompt_cache") + .upsert( + { + owner, + repo, + prompt, + cached_at: new Date().toISOString(), + }, + { onConflict: "owner,repo" } + ) + .then(({ error }) => { + if (error) { + console.error( + "[deep-reverse] cache upsert:", + error.message + ); + } + }); + } + + send("complete", { prompt }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + send("error", { message: `Deep reverse failed: ${message}` }); + } finally { + inFlight.delete(repoKey); + controller.close(); + } + }, + }); +} + +export async function GET(request: NextRequest) { + const fp = deriveFingerprint(request); + if (!fp) { + return NextResponse.json({ eligible: false, remaining: 0 }); + } + const eligible = isEligible(fp); + let remaining = MAX_USES_PER_WEEK; + + if (eligible) { + const supabase = getSupabase(); + if (supabase) { + const weekAgo = new Date( + Date.now() - 7 * 24 * 3600_000 + ).toISOString(); + const { count } = await supabase + .from("deep_reverse_usage") + .select("*", { count: "exact", head: true }) + .eq("fingerprint", fp) + .gte("used_at", weekAgo); + remaining = Math.max(0, MAX_USES_PER_WEEK - (count ?? 0)); + } + } + + return NextResponse.json({ eligible, remaining }); +} + +export async function POST(request: NextRequest) { + let body: { repoUrl?: string }; + try { + body = await request.json(); + } catch { + return NextResponse.json({ error: "Invalid JSON body" }, { status: 400 }); + } + + /* Validate input */ + const repoUrl = typeof body.repoUrl === "string" ? body.repoUrl : ""; + const parsed = parseGitHubRepoInput(repoUrl); + if (!parsed) { + return NextResponse.json( + { + error: + "Could not parse a GitHub repo. Use https://github.com/owner/repo or owner/repo.", + }, + { status: 400 } + ); + } + + const fp = deriveFingerprint(request); + if (!fp) { + return NextResponse.json( + { error: "X-Fingerprint header is required" }, + { status: 400 } + ); + } + + const { owner, repo } = parsed; + const repoKey = `${owner}/${repo}`; + + /* Supabase is required for deep reverse (rate limiting + cache) */ + const supabase = getSupabase(); + if (!supabase) { + return NextResponse.json( + { error: "Deep reverse requires database configuration." }, + { status: 503 } + ); + } + + /* Eligibility (gradual rollout by fingerprint) */ + if (!isEligible(fp)) { + return NextResponse.json( + { error: "deep_reverse_not_available" }, + { status: 403 } + ); + } + + /* LLM must be configured */ + const llmCheck = resolveLlmTarget(); + if ("error" in llmCheck) { + return NextResponse.json( + { error: llmCheck.error }, + { status: 500 } + ); + } + + /* Cache check */ + try { + const { data } = await supabase + .from("deep_prompt_cache") + .select("prompt, cached_at") + .eq("owner", owner) + .eq("repo", repo) + .maybeSingle(); + + if (data?.prompt && data.cached_at) { + const ageHours = + (Date.now() - new Date(data.cached_at as string).getTime()) / 36e5; + if (ageHours < CACHE_TTL_HOURS) { + return NextResponse.json({ + prompt: data.prompt as string, + cached: true, + }); + } + } + } catch { + /* cache miss — proceed */ + } + + /* In-flight dedup: if someone is already processing this repo, ask client to retry */ + if (inFlight.has(repoKey)) { + return NextResponse.json( + { status: "processing", retryAfter: 5 }, + { status: 202 } + ); + } + + /* Rate limit: 3 deep reverses per week per fingerprint */ + const weekAgo = new Date(Date.now() - 7 * 24 * 3600_000).toISOString(); + const { count } = await supabase + .from("deep_reverse_usage") + .select("*", { count: "exact", head: true }) + .eq("fingerprint", fp) + .gte("used_at", weekAgo); + + if ((count ?? 0) >= MAX_USES_PER_WEEK) { + return NextResponse.json( + { error: "deep_reverse_limit_reached", remaining: 0 }, + { status: 429 } + ); + } + + /* Record usage (refunded if DeepWiki not indexed) */ + const { data: usageRow } = await supabase + .from("deep_reverse_usage") + .insert({ + fingerprint: fp, + ip: + request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? null, + owner, + repo, + used_at: new Date().toISOString(), + }) + .select("id") + .single(); + + const usageId = (usageRow as { id: number } | null)?.id ?? null; + + /* Mark in-flight */ + inFlight.add(repoKey); + + /* Return SSE streaming response */ + return new Response( + createDeepReverseStream(owner, repo, usageId, repoKey, llmCheck), + { + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache, no-transform", + Connection: "keep-alive", + }, + } + ); +} diff --git a/app/api/library/route.ts b/app/api/library/route.ts index 54c94b2..9836314 100644 --- a/app/api/library/route.ts +++ b/app/api/library/route.ts @@ -8,6 +8,15 @@ const LIMIT = 24; type SortOption = "trending" | "newest" | "oldest"; +interface PromptRow { + id: number; + owner: string; + repo: string; + prompt: string; + cached_at: string; + views?: number; +} + export async function GET(req: NextRequest) { const supabase = getSupabase(); if (!supabase) { @@ -56,5 +65,33 @@ export async function GET(req: NextRequest) { return NextResponse.json({ error: error.message }, { status: 500 }); } - return NextResponse.json({ data: data ?? [], total: count ?? 0 }); + const rows = (data ?? []) as PromptRow[]; + + /* Overlay deep prompts where available (best-effort) */ + if (rows.length > 0) { + try { + const orFilter = rows + .map((r) => `and(owner.eq.${r.owner},repo.eq.${r.repo})`) + .join(","); + const { data: deepRows } = await supabase + .from("deep_prompt_cache") + .select("owner, repo, prompt") + .or(orFilter); + + if (deepRows && deepRows.length > 0) { + const deepMap = new Map(); + for (const d of deepRows as Array<{ owner: string; repo: string; prompt: string }>) { + deepMap.set(`${d.owner}/${d.repo}`, d.prompt); + } + for (const row of rows) { + const deepPrompt = deepMap.get(`${row.owner}/${row.repo}`); + if (deepPrompt) row.prompt = deepPrompt; + } + } + } catch { + /* deep_prompt_cache may not exist yet — ignore */ + } + } + + return NextResponse.json({ data: rows, total: count ?? 0 }); } diff --git a/app/api/reverse-prompt/route.ts b/app/api/reverse-prompt/route.ts index 8238158..b94c1c3 100644 --- a/app/api/reverse-prompt/route.ts +++ b/app/api/reverse-prompt/route.ts @@ -4,42 +4,15 @@ import { getFileTree, getReadme, getRepoMeta } from "@/lib/github-client"; import { formatAsFilteredTree } from "@/lib/file-tree-formatter"; import { parseGitHubRepoInput } from "@/lib/parse-github-repo"; import { getSupabase } from "@/lib/supabase"; +import { + type LlmTarget, + resolveLlmTarget, + extractProviderErrorMessage, + extractMessage, + buildLlmHeaders, +} from "@/lib/llm-client"; const README_MAX_CHARS = 8000; -const OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"; -const GOOGLE_AI_STUDIO_URL = - "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"; - -type LlmTarget = - | { provider: "openrouter"; url: string; apiKey: string; model: string } - | { provider: "google"; url: string; apiKey: string; model: string }; - -function resolveLlmTarget(): LlmTarget | { error: string } { - const openRouterKey = process.env.OPENROUTER_API_KEY?.trim(); - if (openRouterKey) { - return { - provider: "openrouter", - url: OPENROUTER_URL, - apiKey: openRouterKey, - model: - process.env.OPENROUTER_MODEL?.trim() || "google/gemini-2.5-pro", - }; - } - const googleKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY?.trim(); - if (googleKey) { - return { - provider: "google", - url: GOOGLE_AI_STUDIO_URL, - apiKey: googleKey, - model: - process.env.GOOGLE_AI_STUDIO_MODEL?.trim() || "gemini-2.5-pro", - }; - } - return { - error: - "No LLM API key configured. Set OPENROUTER_API_KEY (recommended) or GOOGLE_GENERATIVE_AI_API_KEY in .env.local.", - }; -} const inFlight = new Map>(); @@ -111,36 +84,6 @@ function isExhaustedCreditsOrQuotaMessage(msg: string): boolean { return false; } -function extractProviderErrorMessage(data: unknown): string | null { - if (!data || typeof data !== "object") return null; - const err = (data as { error?: unknown }).error; - if (typeof err === "string" && err.trim()) return err.trim(); - if (err && typeof err === "object" && "message" in err) { - const m = (err as { message?: unknown }).message; - if (typeof m === "string" && m.trim()) return m.trim(); - } - return null; -} - -function extractMessage(data: unknown): string | null { - if (!data || typeof data !== "object") return null; - const choices = (data as { choices?: unknown }).choices; - if (!Array.isArray(choices) || choices.length === 0) return null; - const first = choices[0] as { message?: { content?: unknown } }; - const content = first.message?.content; - if (typeof content === "string") return content.trim(); - if (Array.isArray(content)) { - const text = content - .map((part) => - part && typeof part === "object" && "text" in part - ? String((part as { text: unknown }).text) - : "" - ) - .join(""); - return text.trim() || null; - } - return null; -} export async function POST(request: NextRequest) { let body: { repoUrl?: string }; @@ -253,22 +196,11 @@ export async function POST(request: NextRequest) { tree.truncated ); - const headers: Record = { - Authorization: `Bearer ${llm.apiKey}`, - "Content-Type": "application/json", - }; - if (llm.provider === "openrouter") { - const referer = process.env.OPENROUTER_HTTP_REFERER?.trim(); - if (referer) headers["HTTP-Referer"] = referer; - const title = process.env.OPENROUTER_APP_TITLE?.trim(); - if (title) headers["X-Title"] = title; - } - let res: Response; try { res = await fetch(llm.url, { method: "POST", - headers, + headers: buildLlmHeaders(llm), body: JSON.stringify({ model: llm.model, messages: [ diff --git a/components/reverse-prompt-home.tsx b/components/reverse-prompt-home.tsx index 37b57ef..8b05da8 100644 --- a/components/reverse-prompt-home.tsx +++ b/components/reverse-prompt-home.tsx @@ -4,6 +4,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import Link from "next/link"; import { HOME_EXAMPLES } from "@/lib/home-example-repos"; import { parseGitHubRepoInput } from "@/lib/parse-github-repo"; +import { generateFingerprint } from "@/lib/fingerprint"; type ReversePromptHomeProps = { initialRepoInput?: string; @@ -25,12 +26,87 @@ export function ReversePromptHome({ const resultsRef = useRef(null); const autoSubmitStartedRef = useRef(false); + /* Deep Reverse state */ + const [deepMode, setDeepMode] = useState(false); + const [deepEligible, setDeepEligible] = useState(false); + const [deepRemaining, setDeepRemaining] = useState(0); + const [deepProgress, setDeepProgress] = useState(null); + const [isDeepResult, setIsDeepResult] = useState(false); + const fingerprintRef = useRef(null); + + /* Resolve fingerprint + check deep reverse eligibility on mount */ + useEffect(() => { + let cancelled = false; + (async () => { + try { + const fp = await generateFingerprint(); + fingerprintRef.current = fp; + const res = await fetch("/api/deep-reverse", { + headers: { "X-Fingerprint": fp }, + }); + if (!res.ok || cancelled) return; + const data = (await res.json()) as { eligible: boolean; remaining: number }; + if (cancelled) return; + setDeepEligible(data.eligible); + setDeepRemaining(data.remaining); + } catch { + /* silently fail — deep reverse just stays hidden */ + } + })(); + return () => { cancelled = true; }; + }, []); + + /* Restore last deep reverse result from localStorage on mount */ + useEffect(() => { + if (initialPrompt || prompt) return; + try { + const saved = localStorage.getItem("deepReverseResult"); + if (!saved) return; + const { repo: savedRepo, prompt: savedPrompt } = JSON.parse(saved) as { + repo: string; + prompt: string; + }; + if (savedRepo && savedPrompt) { + setRepoUrl(savedRepo); + setPrompt(savedPrompt); + setIsDeepResult(true); + } + } catch { + /* corrupt localStorage — ignore */ + } + }, []); + + function saveDeepResult(input: string, deepPrompt: string) { + try { + localStorage.setItem( + "deepReverseResult", + JSON.stringify({ repo: input, prompt: deepPrompt }) + ); + } catch { + /* storage full or unavailable — ignore */ + } + } + + function pushRepoUrl(input: string) { + const parsed = parseGitHubRepoInput(input); + if (parsed && typeof window !== "undefined") { + window.history.replaceState( + null, + "", + `/${encodeURIComponent(parsed.owner)}/${encodeURIComponent(parsed.repo)}` + ); + } + } + + /** Standard reverse prompt (existing logic). */ const runReversePrompt = useCallback(async (input: string) => { setError(null); setRateLimited(false); setPrompt(""); setCopied(false); setLoading(true); + setDeepProgress(null); + setIsDeepResult(false); try { const res = await fetch("/api/reverse-prompt", { method: "POST", @@ -51,14 +127,7 @@ export function ReversePromptHome({ } if (typeof data.prompt === "string") { setPrompt(data.prompt); - const parsed = parseGitHubRepoInput(input); - if (parsed && typeof window !== "undefined") { - window.history.replaceState( - null, - "", - `/${encodeURIComponent(parsed.owner)}/${encodeURIComponent(parsed.repo)}` - ); - } + pushRepoUrl(input); } else { setError("No prompt in response."); } @@ -69,9 +138,136 @@ export function ReversePromptHome({ } }, []); + /** Deep reverse via SSE stream. */ + const deepReverseRef = useRef<(input: string) => Promise>(null); + const runDeepReverse = useCallback(async (input: string) => { + setError(null); + setRateLimited(false); + setPrompt(""); + setCopied(false); + setLoading(true); + setDeepProgress("Starting deep analysis…"); + + const fp = fingerprintRef.current; + if (!fp) { + setError("Could not generate browser fingerprint."); + setLoading(false); + setDeepProgress(null); + return; + } + + try { + const res = await fetch("/api/deep-reverse", { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-Fingerprint": fp, + }, + body: JSON.stringify({ repoUrl: input }), + }); + + /* Non-streaming responses (cached result, errors, 202 in-flight) */ + const ct = res.headers.get("content-type") ?? ""; + if (!ct.includes("text/event-stream")) { + const data = (await res.json()) as { + prompt?: string; + cached?: boolean; + error?: string; + status?: string; + retryAfter?: number; + remaining?: number; + }; + if (data.prompt) { + setPrompt(data.prompt); + setIsDeepResult(true); + setDeepProgress(null); + pushRepoUrl(input); + return; + } + if (data.status === "processing") { + setDeepProgress("Another user is analyzing this repo — waiting…"); + /* Retry after a delay */ + setTimeout(() => void deepReverseRef.current?.(input), (data.retryAfter ?? 5) * 1000); + return; + } + if (res.status === 429) { + setError("You've reached the limit of 3 deep analyses per week."); + setDeepRemaining(0); + setDeepProgress(null); + return; + } + if (res.status === 403 && data.error === "deep_reverse_not_available") { + setError("Deep reverse is not available for your session yet."); + setDeepProgress(null); + return; + } + setError(data.error ?? `Request failed (${res.status})`); + setDeepProgress(null); + return; + } + + /* SSE stream processing */ + const reader = res.body?.getReader(); + if (!reader) { + setError("Failed to read response stream."); + setDeepProgress(null); + return; + } + + const decoder = new TextDecoder(); + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop() ?? ""; + + let currentEvent = ""; + for (const line of lines) { + if (line.startsWith("event: ")) { + currentEvent = line.slice(7).trim(); + } else if (line.startsWith("data: ") && currentEvent) { + try { + const payload = JSON.parse(line.slice(6)) as Record; + if (currentEvent === "progress") { + setDeepProgress(payload.message as string); + } else if (currentEvent === "complete") { + setPrompt(payload.prompt as string); + setIsDeepResult(true); + setDeepProgress(null); + setDeepRemaining((prev) => Math.max(0, prev - 1)); + pushRepoUrl(input); + } else if (currentEvent === "error") { + setError(payload.message as string); + setDeepProgress(null); + } + } catch { + /* malformed SSE data line */ + } + currentEvent = ""; + } + } + } + } catch (err) { + setError(err instanceof Error ? err.message : "Deep reverse failed"); + setDeepProgress(null); + } finally { + setLoading(false); + } + }, []); + deepReverseRef.current = runDeepReverse; + function onSubmit(e: React.FormEvent) { e.preventDefault(); - void runReversePrompt(repoUrl.trim()); + const trimmed = repoUrl.trim(); + if (deepMode) { + void runDeepReverse(trimmed); + } else { + void runReversePrompt(trimmed); + } } useEffect(() => { @@ -287,6 +483,90 @@ export function ReversePromptHome({ ))} + {/* Deep Reverse toggle */} + {deepEligible ? ( +
+
+
+ + Deep Reverse + + {(() => { + const parsed = parseGitHubRepoInput(repoUrl); + if (!parsed) return null; + return ( + + + DeepWiki + + ); + })()} +
+ + Detailed prompt via DeepWiki analysis + {deepRemaining > 0 + ? ` · ${deepRemaining} left this week` + : " · limit reached · cached results still available"} + +
+ +
+ ) : null} + + {/* Deep Reverse progress indicator */} + {deepProgress ? ( +
+ + + + + + {deepProgress} + +
+ ) : null} + {rateLimited ? (

Sorry, we're a bit overwhelmed right now.

@@ -334,7 +614,7 @@ export function ReversePromptHome({

- Reverse engineered prompt + {isDeepResult ? "Deep reverse engineered prompt" : "Reverse engineered prompt"}

{reverseEngineeredRepo ? ( diff --git a/lib/deep-reverse-prompts.ts b/lib/deep-reverse-prompts.ts new file mode 100644 index 0000000..ccaf897 --- /dev/null +++ b/lib/deep-reverse-prompts.ts @@ -0,0 +1,107 @@ +/** + * System prompts for the deep reverse multi-phase pipeline. + */ + +export const DEEP_ANALYSIS_PROMPT = `You are an expert software architect analyzing a repository's documentation structure. + +## Task + +You are given the **wiki structure** (table of contents) of a GitHub repository generated by DeepWiki. Identify the **1 to 5 most important sections** needed to deeply understand and faithfully recreate this project. + +## Selection criteria + +Pick sections that cover: +- **Architecture overview** — system structure, main components, data flow +- **Core business logic** — primary functionality, algorithms, domain rules +- **Data models / API design** — schemas, endpoints, interfaces, protocols +- **Key integrations** — external services, databases, important libraries +- **Configuration / deployment** — build pipeline, environment, infrastructure + +Skip sections about: contributing guidelines, code of conduct, changelog, license, generic "getting started" that only shows install commands. + +## Output format + +Reply with **only** a JSON array of strings — the section paths or identifiers exactly as they appear in the structure you received. No markdown fences, no explanation, no surrounding text. + +Example: ["1-architecture-overview","3.2-api-design","4-data-models"] +`; + +export const DEEP_SYNTHESIS_PROMPT = `You are an expert at crafting detailed, actionable prompts for AI coding agents. + +## Task + +You are given **comprehensive documentation** about a GitHub repository — its architecture, core logic, data models, APIs, and key implementation details gathered from DeepWiki. Write **one detailed prompt** that a developer could paste into Cursor, Claude Code, Codex, or a similar tool to get this project recreated as faithfully as possible. + +## What the output must be + +- **Structured and detailed.** Cover architecture, tech stack, data models, API design, core algorithms, UI/UX patterns, and key implementation details. +- **Actionable.** A skilled AI coding agent should be able to start building from this prompt without guessing. +- **Honest scope.** Only describe what the documentation supports. Do not invent features. +- **Length:** approximately **2000 to 4000 words.** Use sections, but keep it as a single cohesive prompt — not a spec document. +- **Tone:** direct, technical, like a senior engineer briefing a team. Use "Build me…", "The system should…", "Implement…" language. + +## Structure suggestion + +1. **Project overview** — what it does, who it's for, core value proposition +2. **Tech stack** — frameworks, languages, key dependencies with versions where known +3. **Architecture** — high-level structure, main modules/services, data flow diagrams in words +4. **Data models** — schemas, relationships, key entities and their fields +5. **Core features** — detailed description of each major feature with behavior specs +6. **API design** — endpoints, request/response patterns, authentication +7. **UI/UX** — layout, key screens, interaction patterns (if applicable) +8. **Configuration & deployment** — environment variables, build steps, deploy targets + +## What to avoid + +- Vague hand-waving ("make it scalable", "use best practices") +- Dumping raw documentation — synthesize and restructure into a prompt +- Including contributor guidelines, license info, or meta-documentation +- Markdown code blocks with actual source code — describe the patterns instead +- Preamble like "Sure, here is…" or meta like "As an AI…" + +## Output format + +Reply with **only** the prompt text. No title, no wrapping quotes, no explanation before or after. +`; + +/** Build the user message for the analysis phase (LLM picks sections). */ +export function buildAnalysisUserMessage( + owner: string, + repo: string, + wikiStructure: string +): string { + return [ + `# Repository: ${owner}/${repo}`, + "", + "## Wiki Structure (from DeepWiki)", + "", + wikiStructure, + ].join("\n"); +} + +/** Build the user message for the synthesis phase (LLM generates deep prompt). */ +export function buildSynthesisUserMessage( + owner: string, + repo: string, + meta: { + description: string | null; + language: string | null; + stargazers_count: number; + }, + sections: Array<{ path: string; content: string }> +): string { + const header = [ + `# Repository: ${owner}/${repo}`, + "", + `**Description:** ${meta.description ?? "*(none)*"}`, + `**Primary language:** ${meta.language ?? "*(unknown)*"}`, + `**Stars:** ${meta.stargazers_count}`, + "", + ].join("\n"); + + const body = sections + .map((s) => `## ${s.path}\n\n${s.content}`) + .join("\n\n---\n\n"); + + return header + body; +} diff --git a/lib/deepwiki-client.ts b/lib/deepwiki-client.ts new file mode 100644 index 0000000..3cdc586 --- /dev/null +++ b/lib/deepwiki-client.ts @@ -0,0 +1,172 @@ +/** + * DeepWiki MCP client — Streamable HTTP transport (JSON-RPC 2.0). + */ + +const MCP_URL = + process.env.DEEPWIKI_MCP_URL?.trim() || "https://mcp.deepwiki.com/mcp"; +const TIMEOUT_MS = 60_000; +const MAX_RETRIES = 2; +const RETRY_DELAY_MS = 300; + +interface JsonRpcRequest { + jsonrpc: "2.0"; + id: number; + method: string; + params?: Record; +} + +interface JsonRpcError { + code: number; + message: string; + data?: unknown; +} + +interface JsonRpcResponse { + jsonrpc: "2.0"; + id?: number; + result?: unknown; + error?: JsonRpcError; +} + +interface McpToolResult { + content: Array<{ type: string; text?: string }>; + isError?: boolean; +} + +let seqId = 0; + +/** Extract the last JSON-RPC response from an SSE text stream. */ +function lastJsonRpcFromSSE(raw: string): JsonRpcResponse | null { + let last: JsonRpcResponse | null = null; + for (const line of raw.split("\n")) { + if (!line.startsWith("data: ")) continue; + try { + const obj = JSON.parse(line.slice(6)) as JsonRpcResponse; + if (obj.jsonrpc === "2.0") last = obj; + } catch { + /* skip malformed SSE lines */ + } + } + return last; +} + +/** Send a JSON-RPC tool call and return the text response. */ +async function callTool( + tool: string, + args: Record +): Promise { + let lastErr: Error | null = null; + + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const body: JsonRpcRequest = { + jsonrpc: "2.0", + id: ++seqId, + method: "tools/call", + params: { name: tool, arguments: args }, + }; + + const ac = new AbortController(); + const timer = setTimeout(() => ac.abort(), TIMEOUT_MS); + + let res: Response; + try { + res = await fetch(MCP_URL, { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "application/json, text/event-stream", + }, + body: JSON.stringify(body), + signal: ac.signal, + }); + } finally { + clearTimeout(timer); + } + + const ct = res.headers.get("content-type") ?? ""; + const rpc = ct.includes("text/event-stream") + ? lastJsonRpcFromSSE(await res.text()) + : ((await res.json()) as JsonRpcResponse); + + if (rpc?.error) throw new Error(rpc.error.message); + + const result = rpc?.result as McpToolResult | undefined; + if (!result?.content) throw new Error("Empty tool response"); + + if (result.isError) { + const msg = result.content.map((c) => c.text ?? "").join("\n"); + throw new Error(msg || "Tool returned an error"); + } + + return result.content + .filter((c) => c.type === "text" && c.text) + .map((c) => c.text!) + .join("\n"); + } catch (err) { + lastErr = err instanceof Error ? err : new Error(String(err)); + if (attempt < MAX_RETRIES) { + await new Promise((r) => setTimeout(r, RETRY_DELAY_MS * (attempt + 1))); + } + } + } + + throw lastErr!; +} + +/** Check if a DeepWiki response is actually an error disguised as success. */ +function assertNotErrorText(text: string): void { + const lower = text.toLowerCase(); + if ( + lower.includes("repository not found") || + lower.includes("to index it") || + lower.includes("error fetching wiki") || + lower.includes("error processing question") + ) { + throw new Error(text.split("\n")[0] ?? "DeepWiki returned an error"); + } +} + +/** Fetch the wiki table of contents for a repository. */ +export async function readWikiStructure(owner: string, repo: string): Promise { + const result = await callTool("read_wiki_structure", { + repoName: `${owner}/${repo}`, + }); + assertNotErrorText(result); + return result; +} + +/** Fetch the full wiki documentation and return all pages. */ +export async function readWikiContents(owner: string, repo: string): Promise { + const result = await callTool("read_wiki_contents", { + repoName: `${owner}/${repo}`, + }); + assertNotErrorText(result); + return result; +} + +/** Extract specific pages from full wiki text by matching page titles. */ +export function extractPages( + fullWiki: string, + pageTitles: string[] +): Array<{ title: string; content: string }> { + const pages = fullWiki.split(/(?=# Page: )/); + const results: Array<{ title: string; content: string }> = []; + + for (const requested of pageTitles) { + const lower = requested.toLowerCase().trim(); + for (const page of pages) { + const firstLine = page.split("\n")[0] ?? ""; + const pageTitle = firstLine.replace(/^# Page:\s*/, "").trim(); + if (pageTitle.toLowerCase().includes(lower) || lower.includes(pageTitle.toLowerCase())) { + results.push({ + title: pageTitle, + content: page.slice(firstLine.length).trim(), + }); + break; + } + } + } + + return results; +} diff --git a/lib/fingerprint-server.ts b/lib/fingerprint-server.ts new file mode 100644 index 0000000..1fc0080 --- /dev/null +++ b/lib/fingerprint-server.ts @@ -0,0 +1,23 @@ +/** + * Server-side combined fingerprint: merges request signals with client-side hash. + */ + +import { createHash } from "crypto"; +import { NextRequest } from "next/server"; + +/** Derive a combined fingerprint from server signals + client-provided hash. Returns null if X-Fingerprint header is missing. */ +export function deriveFingerprint(request: NextRequest): string | null { + const clientFp = request.headers.get("x-fingerprint"); + if (!clientFp) return null; + + const ip = + request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ?? + request.headers.get("x-real-ip") ?? + "unknown"; + const ua = request.headers.get("user-agent") ?? ""; + const lang = request.headers.get("accept-language") ?? ""; + + return createHash("sha256") + .update([ip, ua, lang, clientFp].join("|")) + .digest("hex"); +} diff --git a/lib/fingerprint.ts b/lib/fingerprint.ts new file mode 100644 index 0000000..168ea40 --- /dev/null +++ b/lib/fingerprint.ts @@ -0,0 +1,24 @@ +/** + * Client-side browser fingerprint for anonymous session tracking. + * Combines stable browser signals into a SHA-256 hash. + */ + +export async function generateFingerprint(): Promise { + const signals = [ + navigator.userAgent, + navigator.language, + `${screen.width}x${screen.height}`, + Intl.DateTimeFormat().resolvedOptions().timeZone, + String(navigator.hardwareConcurrency ?? 0), + String(screen.colorDepth ?? 0), + ].join("|"); + + const hash = await crypto.subtle.digest( + "SHA-256", + new TextEncoder().encode(signals) + ); + + return Array.from(new Uint8Array(hash)) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); +} diff --git a/lib/llm-client.ts b/lib/llm-client.ts new file mode 100644 index 0000000..b567e65 --- /dev/null +++ b/lib/llm-client.ts @@ -0,0 +1,130 @@ +/** + * Shared LLM client — provider resolution, request execution, response parsing. + */ + +const OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"; +const GOOGLE_AI_STUDIO_URL = + "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"; + +export type LlmTarget = + | { provider: "openrouter"; url: string; apiKey: string; model: string } + | { provider: "google"; url: string; apiKey: string; model: string }; + +/** Resolve the configured LLM provider from env vars. */ +export function resolveLlmTarget(): LlmTarget | { error: string } { + const openRouterKey = process.env.OPENROUTER_API_KEY?.trim(); + if (openRouterKey) { + return { + provider: "openrouter", + url: OPENROUTER_URL, + apiKey: openRouterKey, + model: + process.env.OPENROUTER_MODEL?.trim() || "google/gemini-2.5-pro", + }; + } + const googleKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY?.trim(); + if (googleKey) { + return { + provider: "google", + url: GOOGLE_AI_STUDIO_URL, + apiKey: googleKey, + model: + process.env.GOOGLE_AI_STUDIO_MODEL?.trim() || "gemini-2.5-pro", + }; + } + return { + error: + "No LLM API key configured. Set OPENROUTER_API_KEY (recommended) or GOOGLE_GENERATIVE_AI_API_KEY in .env.local.", + }; +} + +/** Extract the top-level error message from an LLM provider response body. */ +export function extractProviderErrorMessage(data: unknown): string | null { + if (!data || typeof data !== "object") return null; + const err = (data as { error?: unknown }).error; + if (typeof err === "string" && err.trim()) return err.trim(); + if (err && typeof err === "object" && "message" in err) { + const m = (err as { message?: unknown }).message; + if (typeof m === "string" && m.trim()) return m.trim(); + } + return null; +} + +/** Extract the assistant text from an OpenAI-compatible chat response. */ +export function extractMessage(data: unknown): string | null { + if (!data || typeof data !== "object") return null; + const choices = (data as { choices?: unknown }).choices; + if (!Array.isArray(choices) || choices.length === 0) return null; + const first = choices[0] as { message?: { content?: unknown } }; + const content = first.message?.content; + if (typeof content === "string") return content.trim(); + if (Array.isArray(content)) { + const text = content + .map((part) => + part && typeof part === "object" && "text" in part + ? String((part as { text: unknown }).text) + : "" + ) + .join(""); + return text.trim() || null; + } + return null; +} + +/** Build authorization headers for the resolved LLM target. */ +export function buildLlmHeaders(llm: LlmTarget): Record { + const headers: Record = { + Authorization: `Bearer ${llm.apiKey}`, + "Content-Type": "application/json", + }; + if (llm.provider === "openrouter") { + const ref = process.env.OPENROUTER_HTTP_REFERER?.trim(); + if (ref) headers["HTTP-Referer"] = ref; + const title = process.env.OPENROUTER_APP_TITLE?.trim(); + if (title) headers["X-Title"] = title; + } + return headers; +} + +/** + * Send a chat completion request and return the assistant message text. + * Throws on network or provider errors. + */ +export async function callLlm( + llm: LlmTarget, + system: string, + user: string, + maxTokens?: number +): Promise { + const body: Record = { + model: llm.model, + messages: [ + { role: "system", content: system }, + { role: "user", content: user }, + ], + }; + /* Google AI Studio ignores max_tokens and returns empty content when set. + Output length is guided by the system prompt instead. */ + if (maxTokens && llm.provider === "openrouter") { + body.max_tokens = maxTokens; + } + + const res = await fetch(llm.url, { + method: "POST", + headers: buildLlmHeaders(llm), + body: JSON.stringify(body), + }); + + if (!res.ok) { + const data = await res.json().catch(() => ({})); + const msg = + extractProviderErrorMessage(data) ?? + `LLM error ${res.status}`; + throw new Error(msg); + } + + const data = await res.json(); + const text = extractMessage(data); + if (!text) throw new Error("Model did not return a usable text response."); + return text; +}