ちょっとバックアップ

2026-02-10 08:59:49 +09:00 · 2026-02-10 08:59:49 +09:00 · dea5747737
commit dea5747737
parent efb48dc746
2 changed files with 321 additions and 0 deletions
--- a/README.old.md
+++ b/README.old.md
@ -0,0 +1,80 @@
 これまでの試行錯誤と、この「凄そうなツール」に詰め込んだ機能をすべて盛り込んだ `README.md` を作成しました。
 このプロキシは、単なる通信の中継器ではなく、**「リモートサーバーの限界を可視化し、Clineを快適に動かすための管制塔」**としての役割を担っています。
 ---
 ## README.md
 # Ollama Debugging Proxy (oproxy)
 リモートサーバー上の Ollama とローカルの Cline を繋ぎ、通信状態やサーバーリソース、モデルの適合性をリアルタイムに可視化するための高機能プロキシツールです。
 ### 🌟 主な機能
 * **戦力分析 (Startup Scan)**: 起動時にリモートの全モデルをスキャンし、以下の2点から「Clineで動くか」を自動判定します。
 * **メモリ判定**: サーバーの空きメモリ（16.8 GiB基準）に収まるか。
 * **ツール判定**: Clineの操作に必要な `Tools (Function Calling)` に対応しているか。
 * **通信の可視化 (Real-time Logs)**:
 * `^`: リクエスト送信（Cline → Proxy → Ollama）
 * `v`: レスポンス受信（Ollama → Proxy → Cline）
 * `|` / `*`: パケットの区切りと完了を視認。
 * **タイムスタンプ**: ミリ秒単位のログで、遅延が発生している箇所を特定。
 * **エラー翻訳**: Ollamaが返す不親切な JSON エラーを解析し、Clineのチャット画面上に「何が原因で、どう対策すべきか」の Markdown レポートとして流し込みます。
 * **物理的表示の安定性**: 絵文字の幅によるズレを排除した「背景色付きバッジ」システムにより、ターミナル上での完璧な整列を実現。
 ---
 ### 🚀 使い方
 #### 1. 起動
 ```bash
 python oproxy.py -r 11433 -l 11434
 ```
 * `-r`: リモートの Ollama ポート（SSHトンネル等のポート）
 * `-l`: Cline が接続するローカルポート
 #### 2. リストの確認
 起動時に表示されるリストで **`READY`** と出ているモデルを確認してください。
 * `READY` (緑): メモリ・機能ともに合格。
 * `TOOL` (黄): メモリは足りるが、ファイル操作等ができない可能性あり。
 * `MEM` (赤): メモリ不足でロードに失敗します。
 #### 3. Cline の設定
 Cline の設定画面で `Base URL` を `http://127.0.0.1:11434` に設定し、リストで確認した最適なモデル名を入力してください。
 ---
 ### 🛠 必要要件
 * **Python**: 3.12以上推奨
 * **依存ライブラリ**:
 * `fastapi`
 * `uvicorn`
 * `httpx`
 ---
 ### 🎨 ログ表示の見方
 ```text
 [HH:MM:SS.ms] READY [TOOL] モデル名  (サイズ) GiB
 [HH:MM:SS.ms] /api/chat: ^^^^|v:vvvvvvvvv*
 ```
 * `^` が出たまま止まる場合：アップロード（テザリング等の上り）がボトルネック。
 * `|` の後に `v` がなかなか出ない場合：サーバー側の推論待ち、またはメモリのロード中。
 * `v` が少しずつ出る場合：ストリーミング中。
--- a/oproxy.py.py
+++ b/oproxy.py.py
@ -0,0 +1,241 @@
 # バージョン情報: Python 3.12+ / FastAPI 0.115.0 / uvicorn 0.30.0 / httpx 0.28.0
 import argparse
 import asyncio
 import json
 import os
 import sys
 import threading
 import unicodedata
 from contextlib import asynccontextmanager
 from datetime import datetime
 import httpx
 import uvicorn
 from fastapi import FastAPI, Request
 from starlette.responses import StreamingResponse
 C_GRAY, C_CYAN, C_GREEN, C_YELLOW, C_RED, C_WHITE, C_RESET = (
    "\033[90m",
    "\033[96m",
    "\033[92m",
    "\033[93m",
    "\033[91m",
    "\033[97m",
    "\033[0m",
 )
 CONFIG = {
    "remote_port": 11430,
    "url": "http://127.0.0.1:11430",
    "timeout": httpx.Timeout(None),
    "loop": None,
    "models_cache": [],
 }
 def get_ts():
    ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
    return f"{C_GRAY}[{ts}] [:{CONFIG['remote_port']}]{C_RESET}"
 def get_width(text):
    count = 0
    for c in text:
        if unicodedata.east_asian_width(c) in "FWA":
            count += 2
        else:
            count += 1
    return count
 def pad_text(text, target_width):
    return text + (" " * max(0, target_width - get_width(text)))
 def pulse(char, color=C_RESET):
    print(f"{color}{char}{C_RESET}", end="", flush=True)
@asynccontextmanager
 async def lifespan(app: FastAPI):
    CONFIG["loop"] = asyncio.get_running_loop()
    asyncio.create_task(update_model_cache())
    yield
 app = FastAPI(lifespan=lifespan)
 # --- ロジック：モデルリスト取得 ---
 async def update_model_cache():
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            res = await client.get(f"{CONFIG['url']}/api/tags")
            if res.status_code == 200:
                new_data = []
                for m in res.json().get("models", []):
                    # ツールサポートの簡易判定
                    has_tool = False
                    try:
                        s = await client.post(
                            f"{CONFIG['url']}/api/show", json={"name": m["name"]}
                        )
                        info = s.json()
                        details = str(info.get("template", "")) + str(
                            info.get("details", "")
                        )
                        has_tool = any(
                            w in details.lower() for w in ["tool", "functions"]
                        )
                    except:
                        pass
                    new_data.append(
                        {
                            "name": m["name"],
                            "size": m["size"] / (1024**3),
                            "tool": has_tool,
                        }
                    )
                CONFIG["models_cache"] = new_data
    except:
        pass
 def show_help():
    print(
        f"\n{get_ts()} {C_WHITE}>>> h:HELP l:LIST ll:DETAIL s:VRAM [digit]:PORT q:EXIT <<<{C_RESET}",
        flush=True,
    )
 def display_models(full=False, short=False):
    if not CONFIG["models_cache"] or short:
        print(
            f"\n{get_ts()} {C_YELLOW}Cache is empty. Ollama may be offline.{C_RESET}",
            flush=True,
        )
        return
    print(
        f"\n{get_ts()} {C_GREEN}--- Models ({'Detailed' if full else 'Short'}) ---{C_RESET}",
        flush=True,
    )
    NAME_W = 55
    for m in CONFIG["models_cache"]:
        icon = "❌" if m["size"] > 16.8 else ("✅" if m["tool"] else "⚠️")
        tag = f"{C_CYAN}[T]{C_RESET}" if m["tool"] else f"{C_GRAY}[-]{C_RESET}"
        if full:
            print(f"{get_ts()} {icon} {tag} {C_WHITE}{m['name']}{C_RESET}")
            print(f"{get_ts()}    {C_GRAY}└─ {m['size']:>6.1f} GiB{C_RESET}")
        else:
            n = m["name"]
            if get_width(n) > NAME_W:
                while get_width("..." + n) > NAME_W:
                    n = n[1:]
                n = "..." + n
            print(
                f"{get_ts()} {icon} {tag} {C_WHITE}{pad_text(n, NAME_W)}{C_RESET} {C_CYAN}{m['size']:>6.1f} GiB{C_RESET}"
            )
    print(f"{get_ts()} {C_GREEN}--- End ---{C_RESET}\n", flush=True)
 # --- Proxy 本体 ---
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
 async def sticky_proxy(path: str, request: Request):
    print(f"\n{get_ts()} {C_WHITE}/{path}{C_RESET} ", end="", flush=True)
    body = await request.body()
    # リクエストの長さに応じてインジケータを出す
    for _ in range(min(len(body) // 256 + 1, 5)):
        pulse("^", C_CYAN)
    pulse("|", C_YELLOW)
    async def stream_response():
        # 接続エラーを回避するためにリクエストごとにClientを生成
        async with httpx.AsyncClient(
            timeout=CONFIG["timeout"], base_url="http://127.0.0.1:11432"
        ) as client:
            try:
                # 宛先を強制的に 127.0.0.1 に固定したURLで構築
                target_url = f"{CONFIG['url']}/{path}"
                async with client.stream(
                    request.method,
                    target_url,
                    content=body,
                    headers={
                        k: v
                        for k, v in request.headers.items()
                        if k.lower() not in ["host", "content-length"]
                    },
                ) as response:
                    pulse("v", C_GREEN)
                    async for chunk in response.aiter_bytes():
                        pulse("v", C_GREEN)
                        yield chunk
                    pulse("*", C_YELLOW)
            except Exception as e:
                print(f" {C_RED}[Err] {type(e).__name__}: {e}{C_RESET}", flush=True)
            finally:
                print("", flush=True)
    return StreamingResponse(stream_response())
 def input_thread():
    while True:
        try:
            line = sys.stdin.readline()
            if not line:
                break
            cmd = line.strip().lower()
            if cmd == "q":
                os._exit(0)
            elif cmd == "h":
                show_help()
            elif cmd == "l":
                display_models(full=True)
            elif cmd == "lll":
                display_models(full=False)
            elif cmd == "s":
                async def ps():
                    async with httpx.AsyncClient() as c:
                        r = await c.get(f"{CONFIG['url']}/api/ps")
                        if r.status_code == 200:
                            print(f"\n{get_ts()} {C_CYAN}--- VRAM ---{C_RESET}")
                            for m in r.json().get("models", []):
                                print(
                                    f"{get_ts()} {m['name']:<25} {m['size_vram'] / (1024**3):.1f}G"
                                )
                if CONFIG["loop"]:
                    asyncio.run_coroutine_threadsafe(ps(), CONFIG["loop"])
            elif cmd.isdigit():
                p = int(cmd)
                CONFIG["remote_port"], CONFIG["url"] = p, f"http://127.0.0.1:{p}"
                print(
                    f"\n{get_ts()} {C_YELLOW}Switch Target -> {CONFIG['url']}{C_RESET}"
                )
                if CONFIG["loop"]:
                    asyncio.run_coroutine_threadsafe(
                        update_model_cache(), CONFIG["loop"]
                    )
        except:
            break
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-r", "--remote", type=int, default=11432
    )  # デフォルトを11432に
    parser.add_argument("-l", "--local", type=int, default=11434)
    args = parser.parse_args()
    CONFIG["remote_port"] = args.remote
    CONFIG["url"] = f"http://127.0.0.1:{args.remote}"
    threading.Thread(target=input_thread, daemon=True).start()
    print(
        f"\n{get_ts()} {C_CYAN}oproxy Start (L:{args.local} -> R:{args.remote}){C_RESET}"
    )
    show_help()
    uvicorn.run(app, host="127.0.0.1", port=args.local, log_level="error")