qwen3-coder:8bによりlll追加の筈

This commit is contained in:
user 2026-02-09 10:45:53 +09:00
parent 7ebb49af8b
commit f2c2039d45

169
oproxy.py
View file

@ -1,47 +1,71 @@
# バージョン情報: Python 3.12+ / FastAPI 0.115.0 / uvicorn 0.30.0 / httpx 0.28.0 # バージョン情報: Python 3.12+ / FastAPI 0.115.0 / uvicorn 0.30.0 / httpx 0.28.0
import httpx, asyncio, json, sys, threading, os, argparse, unicodedata import argparse
from datetime import datetime import asyncio
import json
import os
import sys
import threading
import unicodedata
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from datetime import datetime
import httpx
import uvicorn import uvicorn
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from starlette.responses import StreamingResponse from starlette.responses import StreamingResponse
C_GRAY, C_CYAN, C_GREEN, C_YELLOW, C_RED, C_WHITE, C_RESET = \ C_GRAY, C_CYAN, C_GREEN, C_YELLOW, C_RED, C_WHITE, C_RESET = (
"\033[90m", "\033[96m", "\033[92m", "\033[93m", "\033[91m", "\033[97m", "\033[0m" "\033[90m",
"\033[96m",
"\033[92m",
"\033[93m",
"\033[91m",
"\033[97m",
"\033[0m",
)
CONFIG = { CONFIG = {
"remote_port": 11430, "remote_port": 11430,
"url": "http://127.0.0.1:11430", "url": "http://127.0.0.1:11430",
"timeout": httpx.Timeout(None), "timeout": httpx.Timeout(None),
"loop": None, "loop": None,
"models_cache": [] "models_cache": [],
} }
def get_ts(): def get_ts():
ts = datetime.now().strftime('%H:%M:%S.%f')[:-3] ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
return f"{C_GRAY}[{ts}] [:{CONFIG['remote_port']}]{C_RESET}" return f"{C_GRAY}[{ts}] [:{CONFIG['remote_port']}]{C_RESET}"
def get_width(text): def get_width(text):
count = 0 count = 0
for c in text: for c in text:
if unicodedata.east_asian_width(c) in 'FWA': count += 2 if unicodedata.east_asian_width(c) in "FWA":
else: count += 1 count += 2
else:
count += 1
return count return count
def pad_text(text, target_width): def pad_text(text, target_width):
return text + (" " * max(0, target_width - get_width(text))) return text + (" " * max(0, target_width - get_width(text)))
def pulse(char, color=C_RESET): def pulse(char, color=C_RESET):
print(f"{color}{char}{C_RESET}", end="", flush=True) print(f"{color}{char}{C_RESET}", end="", flush=True)
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
CONFIG["loop"] = asyncio.get_running_loop() CONFIG["loop"] = asyncio.get_running_loop()
asyncio.create_task(update_model_cache()) asyncio.create_task(update_model_cache())
yield yield
app = FastAPI(lifespan=lifespan) app = FastAPI(lifespan=lifespan)
# --- ロジック:モデルリスト取得 --- # --- ロジック:モデルリスト取得 ---
async def update_model_cache(): async def update_model_cache():
try: try:
@ -49,58 +73,99 @@ async def update_model_cache():
res = await client.get(f"{CONFIG['url']}/api/tags") res = await client.get(f"{CONFIG['url']}/api/tags")
if res.status_code == 200: if res.status_code == 200:
new_data = [] new_data = []
for m in res.json().get('models', []): for m in res.json().get("models", []):
# ツールサポートの簡易判定 # ツールサポートの簡易判定
has_tool = False has_tool = False
try: try:
s = await client.post(f"{CONFIG['url']}/api/show", json={"name": m['name']}) s = await client.post(
f"{CONFIG['url']}/api/show", json={"name": m["name"]}
)
info = s.json() info = s.json()
details = str(info.get("template", "")) + str(info.get("details", "")) details = str(info.get("template", "")) + str(
has_tool = any(w in details.lower() for w in ["tool", "functions"]) info.get("details", "")
except: pass )
new_data.append({"name": m['name'], "size": m['size']/(1024**3), "tool": has_tool}) has_tool = any(
w in details.lower() for w in ["tool", "functions"]
)
except:
pass
new_data.append(
{
"name": m["name"],
"size": m["size"] / (1024**3),
"tool": has_tool,
}
)
CONFIG["models_cache"] = new_data CONFIG["models_cache"] = new_data
except: pass except:
pass
def show_help(): def show_help():
print(f"\n{get_ts()} {C_WHITE}>>> h:HELP l:LIST ll:DETAIL s:VRAM [digit]:PORT q:EXIT <<<{C_RESET}", flush=True) print(
f"\n{get_ts()} {C_WHITE}>>> h:HELP l:LIST ll:DETAIL s:VRAM [digit]:PORT q:EXIT <<<{C_RESET}",
flush=True,
)
def display_models(full=False):
if not CONFIG["models_cache"]: def display_models(full=False, short=False):
print(f"\n{get_ts()} {C_YELLOW}Cache is empty. Ollama may be offline.{C_RESET}", flush=True) if not CONFIG["models_cache"] or short:
print(
f"\n{get_ts()} {C_YELLOW}Cache is empty. Ollama may be offline.{C_RESET}",
flush=True,
)
return return
print(f"\n{get_ts()} {C_GREEN}--- Models ({'Detailed' if full else 'Short'}) ---{C_RESET}", flush=True) print(
f"\n{get_ts()} {C_GREEN}--- Models ({'Detailed' if full else 'Short'}) ---{C_RESET}",
flush=True,
)
NAME_W = 55 NAME_W = 55
for m in CONFIG["models_cache"]: for m in CONFIG["models_cache"]:
icon = "" if m['size'] > 16.8 else ("" if m['tool'] else "⚠️") icon = "" if m["size"] > 16.8 else ("" if m["tool"] else "⚠️")
tag = f"{C_CYAN}[T]{C_RESET}" if m['tool'] else f"{C_GRAY}[-]{C_RESET}" tag = f"{C_CYAN}[T]{C_RESET}" if m["tool"] else f"{C_GRAY}[-]{C_RESET}"
if full: if full:
print(f"{get_ts()} {icon} {tag} {C_WHITE}{m['name']}{C_RESET}") print(f"{get_ts()} {icon} {tag} {C_WHITE}{m['name']}{C_RESET}")
print(f"{get_ts()} {C_GRAY}└─ {m['size']:>6.1f} GiB{C_RESET}") print(f"{get_ts()} {C_GRAY}└─ {m['size']:>6.1f} GiB{C_RESET}")
else: else:
n = m['name'] n = m["name"]
if get_width(n) > NAME_W: if get_width(n) > NAME_W:
while get_width("..." + n) > NAME_W: n = n[1:] while get_width("..." + n) > NAME_W:
n = n[1:]
n = "..." + n n = "..." + n
print(f"{get_ts()} {icon} {tag} {C_WHITE}{pad_text(n, NAME_W)}{C_RESET} {C_CYAN}{m['size']:>6.1f} GiB{C_RESET}") print(
f"{get_ts()} {icon} {tag} {C_WHITE}{pad_text(n, NAME_W)}{C_RESET} {C_CYAN}{m['size']:>6.1f} GiB{C_RESET}"
)
print(f"{get_ts()} {C_GREEN}--- End ---{C_RESET}\n", flush=True) print(f"{get_ts()} {C_GREEN}--- End ---{C_RESET}\n", flush=True)
# --- Proxy 本体 --- # --- Proxy 本体 ---
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def sticky_proxy(path: str, request: Request): async def sticky_proxy(path: str, request: Request):
print(f"\n{get_ts()} {C_WHITE}/{path}{C_RESET} ", end="", flush=True) print(f"\n{get_ts()} {C_WHITE}/{path}{C_RESET} ", end="", flush=True)
body = await request.body() body = await request.body()
# リクエストの長さに応じてインジケータを出す # リクエストの長さに応じてインジケータを出す
for _ in range(min(len(body)//256 + 1, 5)): pulse("^", C_CYAN) for _ in range(min(len(body) // 256 + 1, 5)):
pulse("^", C_CYAN)
pulse("|", C_YELLOW) pulse("|", C_YELLOW)
async def stream_response(): async def stream_response():
# 接続エラーを回避するためにリクエストごとにClientを生成 # 接続エラーを回避するためにリクエストごとにClientを生成
async with httpx.AsyncClient(timeout=CONFIG["timeout"], base_url="http://127.0.0.1:11432") as client: async with httpx.AsyncClient(
timeout=CONFIG["timeout"], base_url="http://127.0.0.1:11432"
) as client:
try: try:
# 宛先を強制的に 127.0.0.1 に固定したURLで構築 # 宛先を強制的に 127.0.0.1 に固定したURLで構築
target_url = f"{CONFIG['url']}/{path}" target_url = f"{CONFIG['url']}/{path}"
async with client.stream(request.method, target_url, content=body, headers={k:v for k,v in request.headers.items() if k.lower() not in ["host","content-length"]}) as response: async with client.stream(
request.method,
target_url,
content=body,
headers={
k: v
for k, v in request.headers.items()
if k.lower() not in ["host", "content-length"]
},
) as response:
pulse("v", C_GREEN) pulse("v", C_GREEN)
async for chunk in response.aiter_bytes(): async for chunk in response.aiter_bytes():
pulse("v", C_GREEN) pulse("v", C_GREEN)
@ -113,35 +178,55 @@ async def sticky_proxy(path: str, request: Request):
return StreamingResponse(stream_response()) return StreamingResponse(stream_response())
def input_thread(): def input_thread():
while True: while True:
try: try:
line = sys.stdin.readline() line = sys.stdin.readline()
if not line: break if not line:
break
cmd = line.strip().lower() cmd = line.strip().lower()
if cmd == 'q': os._exit(0) if cmd == "q":
elif cmd == 'h': show_help() os._exit(0)
elif cmd == 'l': display_models(False) elif cmd == "h":
elif cmd == 'll': display_models(full=True) show_help()
elif cmd == 's': elif cmd == "l":
display_models(False)
elif cmd == "ll":
display_models(full=True)
elif cmd == "s":
async def ps(): async def ps():
async with httpx.AsyncClient() as c: async with httpx.AsyncClient() as c:
r = await c.get(f"{CONFIG['url']}/api/ps") r = await c.get(f"{CONFIG['url']}/api/ps")
if r.status_code == 200: if r.status_code == 200:
print(f"\n{get_ts()} {C_CYAN}--- VRAM ---{C_RESET}") print(f"\n{get_ts()} {C_CYAN}--- VRAM ---{C_RESET}")
for m in r.json().get("models", []): for m in r.json().get("models", []):
print(f"{get_ts()} {m['name']:<25} {m['size_vram']/(1024**3):.1f}G") print(
if CONFIG["loop"]: asyncio.run_coroutine_threadsafe(ps(), CONFIG["loop"]) f"{get_ts()} {m['name']:<25} {m['size_vram'] / (1024**3):.1f}G"
)
if CONFIG["loop"]:
asyncio.run_coroutine_threadsafe(ps(), CONFIG["loop"])
elif cmd.isdigit(): elif cmd.isdigit():
p = int(cmd) p = int(cmd)
CONFIG["remote_port"], CONFIG["url"] = p, f"http://127.0.0.1:{p}" CONFIG["remote_port"], CONFIG["url"] = p, f"http://127.0.0.1:{p}"
print(f"\n{get_ts()} {C_YELLOW}Switch Target -> {CONFIG['url']}{C_RESET}") print(
if CONFIG["loop"]: asyncio.run_coroutine_threadsafe(update_model_cache(), CONFIG["loop"]) f"\n{get_ts()} {C_YELLOW}Switch Target -> {CONFIG['url']}{C_RESET}"
except: break )
if CONFIG["loop"]:
asyncio.run_coroutine_threadsafe(
update_model_cache(), CONFIG["loop"]
)
except:
break
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-r", "--remote", type=int, default=11432) # デフォルトを11432に parser.add_argument(
"-r", "--remote", type=int, default=11432
) # デフォルトを11432に
parser.add_argument("-l", "--local", type=int, default=11434) parser.add_argument("-l", "--local", type=int, default=11434)
args = parser.parse_args() args = parser.parse_args()
@ -149,6 +234,8 @@ if __name__ == "__main__":
CONFIG["url"] = f"http://127.0.0.1:{args.remote}" CONFIG["url"] = f"http://127.0.0.1:{args.remote}"
threading.Thread(target=input_thread, daemon=True).start() threading.Thread(target=input_thread, daemon=True).start()
print(f"\n{get_ts()} {C_CYAN}oproxy Start (L:{args.local} -> R:{args.remote}){C_RESET}") print(
f"\n{get_ts()} {C_CYAN}oproxy Start (L:{args.local} -> R:{args.remote}){C_RESET}"
)
show_help() show_help()
uvicorn.run(app, host="127.0.0.1", port=args.local, log_level="error") uvicorn.run(app, host="127.0.0.1", port=args.local, log_level="error")