qwen3-coder:8bによりlll追加の筈
This commit is contained in:
parent
7ebb49af8b
commit
f2c2039d45
1 changed files with 130 additions and 43 deletions
173
oproxy.py
173
oproxy.py
|
|
@ -1,47 +1,71 @@
|
||||||
# バージョン情報: Python 3.12+ / FastAPI 0.115.0 / uvicorn 0.30.0 / httpx 0.28.0
|
# バージョン情報: Python 3.12+ / FastAPI 0.115.0 / uvicorn 0.30.0 / httpx 0.28.0
|
||||||
import httpx, asyncio, json, sys, threading, os, argparse, unicodedata
|
import argparse
|
||||||
from datetime import datetime
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import unicodedata
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import httpx
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from fastapi import FastAPI, Request
|
from fastapi import FastAPI, Request
|
||||||
from starlette.responses import StreamingResponse
|
from starlette.responses import StreamingResponse
|
||||||
|
|
||||||
C_GRAY, C_CYAN, C_GREEN, C_YELLOW, C_RED, C_WHITE, C_RESET = \
|
C_GRAY, C_CYAN, C_GREEN, C_YELLOW, C_RED, C_WHITE, C_RESET = (
|
||||||
"\033[90m", "\033[96m", "\033[92m", "\033[93m", "\033[91m", "\033[97m", "\033[0m"
|
"\033[90m",
|
||||||
|
"\033[96m",
|
||||||
|
"\033[92m",
|
||||||
|
"\033[93m",
|
||||||
|
"\033[91m",
|
||||||
|
"\033[97m",
|
||||||
|
"\033[0m",
|
||||||
|
)
|
||||||
|
|
||||||
CONFIG = {
|
CONFIG = {
|
||||||
"remote_port": 11430,
|
"remote_port": 11430,
|
||||||
"url": "http://127.0.0.1:11430",
|
"url": "http://127.0.0.1:11430",
|
||||||
"timeout": httpx.Timeout(None),
|
"timeout": httpx.Timeout(None),
|
||||||
"loop": None,
|
"loop": None,
|
||||||
"models_cache": []
|
"models_cache": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_ts():
|
def get_ts():
|
||||||
ts = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
|
||||||
return f"{C_GRAY}[{ts}] [:{CONFIG['remote_port']}]{C_RESET}"
|
return f"{C_GRAY}[{ts}] [:{CONFIG['remote_port']}]{C_RESET}"
|
||||||
|
|
||||||
|
|
||||||
def get_width(text):
|
def get_width(text):
|
||||||
count = 0
|
count = 0
|
||||||
for c in text:
|
for c in text:
|
||||||
if unicodedata.east_asian_width(c) in 'FWA': count += 2
|
if unicodedata.east_asian_width(c) in "FWA":
|
||||||
else: count += 1
|
count += 2
|
||||||
|
else:
|
||||||
|
count += 1
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
def pad_text(text, target_width):
|
def pad_text(text, target_width):
|
||||||
return text + (" " * max(0, target_width - get_width(text)))
|
return text + (" " * max(0, target_width - get_width(text)))
|
||||||
|
|
||||||
|
|
||||||
def pulse(char, color=C_RESET):
|
def pulse(char, color=C_RESET):
|
||||||
print(f"{color}{char}{C_RESET}", end="", flush=True)
|
print(f"{color}{char}{C_RESET}", end="", flush=True)
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
CONFIG["loop"] = asyncio.get_running_loop()
|
CONFIG["loop"] = asyncio.get_running_loop()
|
||||||
asyncio.create_task(update_model_cache())
|
asyncio.create_task(update_model_cache())
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(lifespan=lifespan)
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
# --- ロジック:モデルリスト取得 ---
|
# --- ロジック:モデルリスト取得 ---
|
||||||
async def update_model_cache():
|
async def update_model_cache():
|
||||||
try:
|
try:
|
||||||
|
|
@ -49,58 +73,99 @@ async def update_model_cache():
|
||||||
res = await client.get(f"{CONFIG['url']}/api/tags")
|
res = await client.get(f"{CONFIG['url']}/api/tags")
|
||||||
if res.status_code == 200:
|
if res.status_code == 200:
|
||||||
new_data = []
|
new_data = []
|
||||||
for m in res.json().get('models', []):
|
for m in res.json().get("models", []):
|
||||||
# ツールサポートの簡易判定
|
# ツールサポートの簡易判定
|
||||||
has_tool = False
|
has_tool = False
|
||||||
try:
|
try:
|
||||||
s = await client.post(f"{CONFIG['url']}/api/show", json={"name": m['name']})
|
s = await client.post(
|
||||||
|
f"{CONFIG['url']}/api/show", json={"name": m["name"]}
|
||||||
|
)
|
||||||
info = s.json()
|
info = s.json()
|
||||||
details = str(info.get("template", "")) + str(info.get("details", ""))
|
details = str(info.get("template", "")) + str(
|
||||||
has_tool = any(w in details.lower() for w in ["tool", "functions"])
|
info.get("details", "")
|
||||||
except: pass
|
)
|
||||||
new_data.append({"name": m['name'], "size": m['size']/(1024**3), "tool": has_tool})
|
has_tool = any(
|
||||||
|
w in details.lower() for w in ["tool", "functions"]
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
new_data.append(
|
||||||
|
{
|
||||||
|
"name": m["name"],
|
||||||
|
"size": m["size"] / (1024**3),
|
||||||
|
"tool": has_tool,
|
||||||
|
}
|
||||||
|
)
|
||||||
CONFIG["models_cache"] = new_data
|
CONFIG["models_cache"] = new_data
|
||||||
except: pass
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def show_help():
|
def show_help():
|
||||||
print(f"\n{get_ts()} {C_WHITE}>>> h:HELP l:LIST ll:DETAIL s:VRAM [digit]:PORT q:EXIT <<<{C_RESET}", flush=True)
|
print(
|
||||||
|
f"\n{get_ts()} {C_WHITE}>>> h:HELP l:LIST ll:DETAIL s:VRAM [digit]:PORT q:EXIT <<<{C_RESET}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
def display_models(full=False):
|
|
||||||
if not CONFIG["models_cache"]:
|
def display_models(full=False, short=False):
|
||||||
print(f"\n{get_ts()} {C_YELLOW}Cache is empty. Ollama may be offline.{C_RESET}", flush=True)
|
if not CONFIG["models_cache"] or short:
|
||||||
|
print(
|
||||||
|
f"\n{get_ts()} {C_YELLOW}Cache is empty. Ollama may be offline.{C_RESET}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
return
|
return
|
||||||
print(f"\n{get_ts()} {C_GREEN}--- Models ({'Detailed' if full else 'Short'}) ---{C_RESET}", flush=True)
|
print(
|
||||||
|
f"\n{get_ts()} {C_GREEN}--- Models ({'Detailed' if full else 'Short'}) ---{C_RESET}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
NAME_W = 55
|
NAME_W = 55
|
||||||
for m in CONFIG["models_cache"]:
|
for m in CONFIG["models_cache"]:
|
||||||
icon = "❌" if m['size'] > 16.8 else ("✅" if m['tool'] else "⚠️")
|
icon = "❌" if m["size"] > 16.8 else ("✅" if m["tool"] else "⚠️")
|
||||||
tag = f"{C_CYAN}[T]{C_RESET}" if m['tool'] else f"{C_GRAY}[-]{C_RESET}"
|
tag = f"{C_CYAN}[T]{C_RESET}" if m["tool"] else f"{C_GRAY}[-]{C_RESET}"
|
||||||
if full:
|
if full:
|
||||||
print(f"{get_ts()} {icon} {tag} {C_WHITE}{m['name']}{C_RESET}")
|
print(f"{get_ts()} {icon} {tag} {C_WHITE}{m['name']}{C_RESET}")
|
||||||
print(f"{get_ts()} {C_GRAY}└─ {m['size']:>6.1f} GiB{C_RESET}")
|
print(f"{get_ts()} {C_GRAY}└─ {m['size']:>6.1f} GiB{C_RESET}")
|
||||||
else:
|
else:
|
||||||
n = m['name']
|
n = m["name"]
|
||||||
if get_width(n) > NAME_W:
|
if get_width(n) > NAME_W:
|
||||||
while get_width("..." + n) > NAME_W: n = n[1:]
|
while get_width("..." + n) > NAME_W:
|
||||||
|
n = n[1:]
|
||||||
n = "..." + n
|
n = "..." + n
|
||||||
print(f"{get_ts()} {icon} {tag} {C_WHITE}{pad_text(n, NAME_W)}{C_RESET} {C_CYAN}{m['size']:>6.1f} GiB{C_RESET}")
|
print(
|
||||||
|
f"{get_ts()} {icon} {tag} {C_WHITE}{pad_text(n, NAME_W)}{C_RESET} {C_CYAN}{m['size']:>6.1f} GiB{C_RESET}"
|
||||||
|
)
|
||||||
print(f"{get_ts()} {C_GREEN}--- End ---{C_RESET}\n", flush=True)
|
print(f"{get_ts()} {C_GREEN}--- End ---{C_RESET}\n", flush=True)
|
||||||
|
|
||||||
|
|
||||||
# --- Proxy 本体 ---
|
# --- Proxy 本体 ---
|
||||||
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
|
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
|
||||||
async def sticky_proxy(path: str, request: Request):
|
async def sticky_proxy(path: str, request: Request):
|
||||||
print(f"\n{get_ts()} {C_WHITE}/{path}{C_RESET} ", end="", flush=True)
|
print(f"\n{get_ts()} {C_WHITE}/{path}{C_RESET} ", end="", flush=True)
|
||||||
body = await request.body()
|
body = await request.body()
|
||||||
# リクエストの長さに応じてインジケータを出す
|
# リクエストの長さに応じてインジケータを出す
|
||||||
for _ in range(min(len(body)//256 + 1, 5)): pulse("^", C_CYAN)
|
for _ in range(min(len(body) // 256 + 1, 5)):
|
||||||
|
pulse("^", C_CYAN)
|
||||||
pulse("|", C_YELLOW)
|
pulse("|", C_YELLOW)
|
||||||
|
|
||||||
async def stream_response():
|
async def stream_response():
|
||||||
# 接続エラーを回避するためにリクエストごとにClientを生成
|
# 接続エラーを回避するためにリクエストごとにClientを生成
|
||||||
async with httpx.AsyncClient(timeout=CONFIG["timeout"], base_url="http://127.0.0.1:11432") as client:
|
async with httpx.AsyncClient(
|
||||||
|
timeout=CONFIG["timeout"], base_url="http://127.0.0.1:11432"
|
||||||
|
) as client:
|
||||||
try:
|
try:
|
||||||
# 宛先を強制的に 127.0.0.1 に固定したURLで構築
|
# 宛先を強制的に 127.0.0.1 に固定したURLで構築
|
||||||
target_url = f"{CONFIG['url']}/{path}"
|
target_url = f"{CONFIG['url']}/{path}"
|
||||||
async with client.stream(request.method, target_url, content=body, headers={k:v for k,v in request.headers.items() if k.lower() not in ["host","content-length"]}) as response:
|
async with client.stream(
|
||||||
|
request.method,
|
||||||
|
target_url,
|
||||||
|
content=body,
|
||||||
|
headers={
|
||||||
|
k: v
|
||||||
|
for k, v in request.headers.items()
|
||||||
|
if k.lower() not in ["host", "content-length"]
|
||||||
|
},
|
||||||
|
) as response:
|
||||||
pulse("v", C_GREEN)
|
pulse("v", C_GREEN)
|
||||||
async for chunk in response.aiter_bytes():
|
async for chunk in response.aiter_bytes():
|
||||||
pulse("v", C_GREEN)
|
pulse("v", C_GREEN)
|
||||||
|
|
@ -113,42 +178,64 @@ async def sticky_proxy(path: str, request: Request):
|
||||||
|
|
||||||
return StreamingResponse(stream_response())
|
return StreamingResponse(stream_response())
|
||||||
|
|
||||||
|
|
||||||
def input_thread():
|
def input_thread():
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
line = sys.stdin.readline()
|
line = sys.stdin.readline()
|
||||||
if not line: break
|
if not line:
|
||||||
|
break
|
||||||
cmd = line.strip().lower()
|
cmd = line.strip().lower()
|
||||||
if cmd == 'q': os._exit(0)
|
if cmd == "q":
|
||||||
elif cmd == 'h': show_help()
|
os._exit(0)
|
||||||
elif cmd == 'l': display_models(False)
|
elif cmd == "h":
|
||||||
elif cmd == 'll': display_models(full=True)
|
show_help()
|
||||||
elif cmd == 's':
|
elif cmd == "l":
|
||||||
|
display_models(False)
|
||||||
|
elif cmd == "ll":
|
||||||
|
display_models(full=True)
|
||||||
|
elif cmd == "s":
|
||||||
|
|
||||||
async def ps():
|
async def ps():
|
||||||
async with httpx.AsyncClient() as c:
|
async with httpx.AsyncClient() as c:
|
||||||
r = await c.get(f"{CONFIG['url']}/api/ps")
|
r = await c.get(f"{CONFIG['url']}/api/ps")
|
||||||
if r.status_code == 200:
|
if r.status_code == 200:
|
||||||
print(f"\n{get_ts()} {C_CYAN}--- VRAM ---{C_RESET}")
|
print(f"\n{get_ts()} {C_CYAN}--- VRAM ---{C_RESET}")
|
||||||
for m in r.json().get("models", []):
|
for m in r.json().get("models", []):
|
||||||
print(f"{get_ts()} {m['name']:<25} {m['size_vram']/(1024**3):.1f}G")
|
print(
|
||||||
if CONFIG["loop"]: asyncio.run_coroutine_threadsafe(ps(), CONFIG["loop"])
|
f"{get_ts()} {m['name']:<25} {m['size_vram'] / (1024**3):.1f}G"
|
||||||
|
)
|
||||||
|
|
||||||
|
if CONFIG["loop"]:
|
||||||
|
asyncio.run_coroutine_threadsafe(ps(), CONFIG["loop"])
|
||||||
elif cmd.isdigit():
|
elif cmd.isdigit():
|
||||||
p = int(cmd)
|
p = int(cmd)
|
||||||
CONFIG["remote_port"], CONFIG["url"] = p, f"http://127.0.0.1:{p}"
|
CONFIG["remote_port"], CONFIG["url"] = p, f"http://127.0.0.1:{p}"
|
||||||
print(f"\n{get_ts()} {C_YELLOW}Switch Target -> {CONFIG['url']}{C_RESET}")
|
print(
|
||||||
if CONFIG["loop"]: asyncio.run_coroutine_threadsafe(update_model_cache(), CONFIG["loop"])
|
f"\n{get_ts()} {C_YELLOW}Switch Target -> {CONFIG['url']}{C_RESET}"
|
||||||
except: break
|
)
|
||||||
|
if CONFIG["loop"]:
|
||||||
|
asyncio.run_coroutine_threadsafe(
|
||||||
|
update_model_cache(), CONFIG["loop"]
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-r", "--remote", type=int, default=11432) # デフォルトを11432に
|
parser.add_argument(
|
||||||
|
"-r", "--remote", type=int, default=11432
|
||||||
|
) # デフォルトを11432に
|
||||||
parser.add_argument("-l", "--local", type=int, default=11434)
|
parser.add_argument("-l", "--local", type=int, default=11434)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
CONFIG["remote_port"] = args.remote
|
CONFIG["remote_port"] = args.remote
|
||||||
CONFIG["url"] = f"http://127.0.0.1:{args.remote}"
|
CONFIG["url"] = f"http://127.0.0.1:{args.remote}"
|
||||||
|
|
||||||
threading.Thread(target=input_thread, daemon=True).start()
|
threading.Thread(target=input_thread, daemon=True).start()
|
||||||
print(f"\n{get_ts()} {C_CYAN}oproxy Start (L:{args.local} -> R:{args.remote}){C_RESET}")
|
print(
|
||||||
|
f"\n{get_ts()} {C_CYAN}oproxy Start (L:{args.local} -> R:{args.remote}){C_RESET}"
|
||||||
|
)
|
||||||
show_help()
|
show_help()
|
||||||
uvicorn.run(app, host="127.0.0.1", port=args.local, log_level="error")
|
uvicorn.run(app, host="127.0.0.1", port=args.local, log_level="error")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue