503 lines
18 KiB
Python
503 lines
18 KiB
Python
from fastapi.staticfiles import StaticFiles
|
||
from fastapi.responses import FileResponse, Response
|
||
from typing import List, Optional, Dict
|
||
from fastapi import FastAPI, UploadFile, File, HTTPException, Form, Body
|
||
from fastapi.responses import JSONResponse
|
||
from pydantic import BaseModel
|
||
import uvicorn
|
||
from rag_engine_gemini import RAGEngineGemini
|
||
import logging
|
||
from prometheus_fastapi_instrumentator import Instrumentator
|
||
import asyncio
|
||
import hashlib
|
||
from datetime import datetime
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
import os
|
||
import json
|
||
import base64
|
||
from urllib.parse import quote
|
||
|
||
logging.basicConfig(level=logging.INFO)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
os.makedirs("/opt/rag-gemini/app/reports", exist_ok=True)
|
||
SHIPPING_TYPES_FILE = os.path.join(os.path.dirname(__file__), "shipping_types.json")
|
||
PROCESSED_SHIPPING_TYPES_FILE = os.path.join(
|
||
os.path.dirname(__file__), "shipping_types_processed.json"
|
||
)
|
||
|
||
|
||
class ShippingTypeBase(BaseModel):
|
||
name: str
|
||
criteria: str = ""
|
||
keywords: List[str] = []
|
||
employee_email: str = ""
|
||
confirmation_template: str = ""
|
||
info_request_template: str = ""
|
||
|
||
|
||
class ShippingTypeCreate(ShippingTypeBase):
|
||
pass
|
||
|
||
|
||
class ShippingTypeUpdate(ShippingTypeBase):
|
||
pass
|
||
|
||
|
||
class ShippingType(ShippingTypeBase):
|
||
id: int
|
||
|
||
|
||
def load_shipping_types():
|
||
"""Загружает типы перевозок с нормализацией ключей"""
|
||
if not os.path.exists(SHIPPING_TYPES_FILE):
|
||
return []
|
||
with open(SHIPPING_TYPES_FILE, "r", encoding="utf-8") as f:
|
||
types = json.load(f)
|
||
|
||
# Нормализуем ключи (убираем пробелы) и keywords
|
||
normalized_types = []
|
||
for t in types:
|
||
normalized = {}
|
||
for key, value in t.items():
|
||
clean_key = key.strip()
|
||
if clean_key == "keywords" and isinstance(value, list):
|
||
# Нормализуем keywords - каждый элемент отдельно, убираем пробелы и кавычки
|
||
normalized[clean_key] = [kw.strip().strip('"').strip("'") for kw in value if kw.strip()]
|
||
elif isinstance(value, str):
|
||
normalized[clean_key] = value.strip()
|
||
else:
|
||
normalized[clean_key] = value
|
||
normalized_types.append(normalized)
|
||
|
||
return normalized_types
|
||
|
||
|
||
def save_shipping_types(types):
|
||
"""Сохраняет типы перевозок в файл"""
|
||
with open(SHIPPING_TYPES_FILE, "w", encoding="utf-8") as f:
|
||
json.dump(types, f, ensure_ascii=False, indent=2)
|
||
|
||
|
||
def load_processed_shipping_criteria() -> Dict[str, str]:
|
||
"""Loads AI-processed criteria per shipping type name."""
|
||
if not os.path.exists(PROCESSED_SHIPPING_TYPES_FILE):
|
||
return {}
|
||
try:
|
||
with open(PROCESSED_SHIPPING_TYPES_FILE, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
if isinstance(data, dict):
|
||
return {str(k): str(v) for k, v in data.items() if v is not None}
|
||
return {}
|
||
except Exception as e:
|
||
logger.warning(f"Failed to load processed shipping criteria: {e}")
|
||
return {}
|
||
|
||
|
||
def save_processed_shipping_criteria(mapping: Dict[str, str]) -> None:
|
||
"""Saves AI-processed criteria per shipping type name."""
|
||
with open(PROCESSED_SHIPPING_TYPES_FILE, "w", encoding="utf-8") as f:
|
||
json.dump(mapping, f, ensure_ascii=False, indent=2)
|
||
|
||
|
||
def upsert_processed_shipping_criteria(type_name: str, processed_criteria: str) -> None:
|
||
if not isinstance(type_name, str) or not type_name.strip():
|
||
return
|
||
processed_criteria = processed_criteria if isinstance(processed_criteria, str) else ""
|
||
mapping = load_processed_shipping_criteria()
|
||
mapping[type_name] = processed_criteria
|
||
save_processed_shipping_criteria(mapping)
|
||
|
||
|
||
app = FastAPI(title="SEPTEM Cargo RAG System")
|
||
app.mount("/addin", StaticFiles(directory=os.path.join(os.getcwd(), "addin"), html=True), name="addin")
|
||
|
||
OLD_HOST = "nec.septem.pro"
|
||
NEW_HOST = "nec.clients.septem.pro"
|
||
|
||
|
||
@app.middleware("http")
|
||
async def redirect_legacy_host(request, call_next):
|
||
host = request.headers.get("host", "").split(":")[0].lower()
|
||
if host == OLD_HOST:
|
||
target_url = f"https://{NEW_HOST}{request.url.path}"
|
||
if request.url.query:
|
||
target_url = f"{target_url}?{request.url.query}"
|
||
return JSONResponse(
|
||
status_code=308,
|
||
content={"detail": "Permanent Redirect"},
|
||
headers={"Location": target_url},
|
||
)
|
||
return await call_next(request)
|
||
|
||
|
||
@app.get("/addin/taskpane.html")
|
||
async def get_taskpane():
|
||
if os.path.exists("addin/taskpane.html"):
|
||
return FileResponse("addin/taskpane.html")
|
||
elif os.path.exists("frontend/taskpane.html"):
|
||
return FileResponse("frontend/taskpane.html")
|
||
else:
|
||
return {"error": "taskpane.html not found"}
|
||
|
||
|
||
@app.get("/addin/commands.html")
|
||
async def get_commands():
|
||
if os.path.exists("addin/commands.html"):
|
||
return FileResponse("addin/commands.html")
|
||
elif os.path.exists("frontend/commands.html"):
|
||
return FileResponse("frontend/commands.html")
|
||
else:
|
||
return {"error": "commands.html not found"}
|
||
|
||
|
||
rag = RAGEngineGemini()
|
||
Instrumentator().instrument(app).expose(app)
|
||
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=[
|
||
"https://nec.clients.septem.pro",
|
||
"https://localhost:3000",
|
||
"http://localhost:8501"
|
||
],
|
||
allow_credentials=True,
|
||
allow_methods=["*"],
|
||
allow_headers=["*"],
|
||
expose_headers=["Content-Disposition", "Content-Length"]
|
||
)
|
||
|
||
|
||
class EmailAttachment(BaseModel):
|
||
filename: str
|
||
size: int
|
||
content: Optional[str] = None
|
||
|
||
|
||
class OutlookEmail(BaseModel):
|
||
id: str
|
||
subject: str
|
||
sender: str
|
||
senderName: Optional[str] = None
|
||
body: str
|
||
body_html: Optional[str] = None
|
||
receivedTime: Optional[str] = None
|
||
to: Optional[str] = None
|
||
cc: Optional[str] = None
|
||
attachments: List[EmailAttachment] = []
|
||
|
||
class OutlookEmailsRequest(BaseModel):
|
||
emails: List[OutlookEmail]
|
||
session_id: Optional[str] = None
|
||
|
||
|
||
class CargoQueryRequest(BaseModel):
|
||
query: str
|
||
session_id: Optional[str] = None
|
||
top_k: int = 10
|
||
|
||
|
||
class CargoQueryResponse(BaseModel):
|
||
answer: str
|
||
structured_data: dict
|
||
sources: list
|
||
total_emails_analyzed: int
|
||
|
||
|
||
class AnalyzeCargoRequest(BaseModel):
|
||
email_ids: List[str]
|
||
|
||
|
||
class CargoLearningRequest(BaseModel):
|
||
"""Сохранение примера для обучения: переписка + структурированный ответ (как в отчёте)."""
|
||
structured_data: dict
|
||
session_id: Optional[str] = None
|
||
context_preview: Optional[str] = None
|
||
notes: Optional[str] = None
|
||
|
||
|
||
@app.post("/record-cargo-learning")
|
||
async def record_cargo_learning(req: CargoLearningRequest):
|
||
"""
|
||
Записывает пару «контекст писем → JSON отчёта» в локальное хранилище few-shot.
|
||
Если передан session_id, текст писем подставляется из текущей сессии сервера.
|
||
"""
|
||
try:
|
||
ok = rag.record_cargo_learning(
|
||
structured_data=req.structured_data,
|
||
session_id=req.session_id,
|
||
context_preview=req.context_preview,
|
||
notes=req.notes,
|
||
)
|
||
return {"status": "ok" if ok else "skipped", "stored": ok}
|
||
except Exception as e:
|
||
logger.error(f"record-cargo-learning error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/process-outlook-emails")
|
||
async def process_outlook_emails(request: OutlookEmailsRequest):
|
||
try:
|
||
session_id = await rag.process_outlook_emails(
|
||
[email.model_dump() for email in request.emails],
|
||
session_id=request.session_id
|
||
)
|
||
return {
|
||
"status": "success",
|
||
"session_id": session_id,
|
||
"emails_processed": len(request.emails)
|
||
}
|
||
except Exception as e:
|
||
logger.error(f"Process Outlook emails error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/query-cargo", response_model=CargoQueryResponse)
|
||
async def query_cargo(request: CargoQueryRequest):
|
||
try:
|
||
result = await rag.query_cargo_info(
|
||
request.query,
|
||
request.session_id,
|
||
request.top_k
|
||
)
|
||
return result
|
||
except Exception as e:
|
||
logger.error(f"Cargo query error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/generate-cargo-report")
|
||
async def generate_cargo_report(session_id: str = Body(..., embed=True)):
|
||
try:
|
||
result = await rag.generate_cargo_report(session_id)
|
||
return result
|
||
except Exception as e:
|
||
logger.error(f"Generate report error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/email-sessions/{session_id}")
|
||
async def get_session_info(session_id: str):
|
||
return {
|
||
"session_id": session_id,
|
||
"created_at": datetime.now().isoformat(),
|
||
"emails_count": 10
|
||
}
|
||
|
||
|
||
@app.post("/upload", include_in_schema=False)
|
||
async def upload_document(file: UploadFile = File(...)):
|
||
try:
|
||
content = await file.read()
|
||
doc_id = hashlib.md5(file.filename.encode()).hexdigest()
|
||
asyncio.create_task(rag.process_document(content, file.filename))
|
||
return {"status": "processing", "document_id": doc_id}
|
||
except Exception as e:
|
||
logger.error(f"Upload error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/health")
|
||
async def health():
|
||
return {"status": "healthy"}
|
||
|
||
|
||
# =============================================================================
|
||
# 🔥 ENDPOINT ДЛЯ ПРОСМОТРА ВЛОЖЕНИЙ В БРАУЗЕРЕ (ИСПРАВЛЕННЫЙ)
|
||
# =============================================================================
|
||
@app.get("/attachments/{session_id}/{email_index}/{attachment_index}")
|
||
async def get_attachment(session_id: str, email_index: int, attachment_index: int):
|
||
"""
|
||
Возвращает оригинальный файл вложения для просмотра в браузере.
|
||
Поддерживает PDF, DOCX, XLSX, изображения и другие форматы.
|
||
Корректно обрабатывает русские имена файлов (RFC 5987).
|
||
"""
|
||
att = rag.get_attachment(session_id, email_index, attachment_index)
|
||
if att is None:
|
||
raise HTTPException(status_code=404, detail="Attachment not found")
|
||
|
||
filename = att.get("filename", "attachment")
|
||
content_base64 = att.get("content_base64")
|
||
|
||
# 🔥 Функция для безопасного формирования заголовка Content-Disposition
|
||
def make_content_disposition(filename: str, disposition: str = "inline") -> str:
|
||
"""
|
||
Формирует Content-Disposition с поддержкой UTF-8 имён файлов (RFC 5987).
|
||
Совместимо со старыми и новыми браузерами.
|
||
"""
|
||
# ASCII-версия для старых браузеров (fallback)
|
||
ascii_filename = filename.encode('ascii', 'ignore').decode('ascii') or 'attachment'
|
||
|
||
# UTF-8 версия с URL-кодированием для современных браузеров (RFC 5987)
|
||
utf8_filename = quote(filename, safe='')
|
||
|
||
return f'{disposition}; filename="{ascii_filename}"; filename*=UTF-8\'\'{utf8_filename}'
|
||
|
||
# Если нет оригинального содержимого, возвращаем извлечённый текст
|
||
if not content_base64:
|
||
text = att.get("text", "")
|
||
return Response(
|
||
content=text,
|
||
media_type="text/plain; charset=utf-8",
|
||
headers={
|
||
"Content-Disposition": make_content_disposition(filename + ".txt"),
|
||
"Content-Length": str(len(text.encode('utf-8'))),
|
||
"Access-Control-Expose-Headers": "Content-Disposition, Content-Length"
|
||
}
|
||
)
|
||
|
||
# 🔥 Декодируем base64 и определяем MIME тип по расширению
|
||
try:
|
||
file_content = base64.b64decode(content_base64)
|
||
|
||
# Определяем MIME тип по расширению файла
|
||
ext = filename.lower().split('.')[-1] if '.' in filename else ''
|
||
mime_types = {
|
||
'pdf': 'application/pdf',
|
||
'doc': 'application/msword',
|
||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||
'xls': 'application/vnd.ms-excel',
|
||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||
'txt': 'text/plain',
|
||
'csv': 'text/csv',
|
||
'png': 'image/png',
|
||
'jpg': 'image/jpeg',
|
||
'jpeg': 'image/jpeg',
|
||
'gif': 'image/gif',
|
||
'bmp': 'image/bmp',
|
||
'zip': 'application/zip',
|
||
'rar': 'application/vnd.rar',
|
||
'ppt': 'application/vnd.ms-powerpoint',
|
||
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||
'rtf': 'application/rtf',
|
||
'xml': 'application/xml',
|
||
'json': 'application/json',
|
||
}
|
||
|
||
media_type = mime_types.get(ext, 'application/octet-stream')
|
||
|
||
# 🔥 Возвращаем файл с правильным заголовком Content-Disposition
|
||
return Response(
|
||
content=file_content,
|
||
media_type=media_type,
|
||
headers={
|
||
"Content-Disposition": make_content_disposition(filename),
|
||
"Content-Length": str(len(file_content)),
|
||
"Access-Control-Expose-Headers": "Content-Disposition, Content-Length"
|
||
}
|
||
)
|
||
except Exception as e:
|
||
logger.error(f"Error serving attachment: {e}")
|
||
raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
|
||
|
||
|
||
@app.get("/shipping-types", response_model=List[ShippingType])
|
||
async def get_shipping_types():
|
||
return load_shipping_types()
|
||
|
||
|
||
@app.post("/shipping-types", response_model=ShippingType)
|
||
async def create_shipping_type(item: ShippingTypeCreate):
|
||
types = load_shipping_types()
|
||
new_id = max([t["id"] for t in types], default=0) + 1
|
||
new_item = item.model_dump()
|
||
new_item["id"] = new_id
|
||
types.append(new_item)
|
||
save_shipping_types(types)
|
||
|
||
# AI-обработка criteria и сохранение результата
|
||
try:
|
||
type_name = new_item.get("name", "")
|
||
criteria_text = new_item.get("criteria", "") or ""
|
||
processed = rag.process_shipping_type_criteria(criteria_text)
|
||
upsert_processed_shipping_criteria(type_name, processed)
|
||
rag.reload_shipping_types()
|
||
except Exception as e:
|
||
logger.warning(f"AI criteria processing failed on create: {e}")
|
||
return new_item
|
||
|
||
|
||
@app.put("/shipping-types/{item_id}", response_model=ShippingType)
|
||
async def update_shipping_type(item_id: int, item: ShippingTypeUpdate):
|
||
types = load_shipping_types()
|
||
for t in types:
|
||
if t["id"] == item_id:
|
||
old_name = t.get("name", "")
|
||
t.update(item.model_dump())
|
||
save_shipping_types(types)
|
||
|
||
# AI-обработка criteria и сохранение результата
|
||
try:
|
||
type_name = t.get("name", "")
|
||
criteria_text = t.get("criteria", "") or ""
|
||
processed = rag.process_shipping_type_criteria(criteria_text)
|
||
|
||
# если поменяли имя типа — чистим старый ключ
|
||
if isinstance(old_name, str) and old_name.strip() and old_name != type_name:
|
||
mapping = load_processed_shipping_criteria()
|
||
if old_name in mapping:
|
||
mapping.pop(old_name, None)
|
||
save_processed_shipping_criteria(mapping)
|
||
|
||
upsert_processed_shipping_criteria(type_name, processed)
|
||
rag.reload_shipping_types()
|
||
except Exception as e:
|
||
logger.warning(f"AI criteria processing failed on update: {e}")
|
||
return t
|
||
raise HTTPException(status_code=404, detail="Type not found")
|
||
|
||
@app.get("/email-attachments/{session_id}")
|
||
async def get_email_attachments(session_id: str):
|
||
|
||
emails = rag.sessions.get(session_id, [])
|
||
|
||
files = []
|
||
|
||
for email in emails:
|
||
for att in email.get("attachments", []):
|
||
|
||
filename = att.get("filename","")
|
||
ext = filename.split(".")[-1].lower()
|
||
|
||
# игнорируем изображения
|
||
if ext in ["png","jpg","jpeg","gif","bmp","tiff","webp"]:
|
||
continue
|
||
|
||
if att.get("content_base64"):
|
||
files.append({
|
||
"filename": filename,
|
||
"content_base64": att["content_base64"]
|
||
})
|
||
|
||
return files
|
||
|
||
@app.delete("/shipping-types/{item_id}")
|
||
async def delete_shipping_type(item_id: int):
|
||
types = load_shipping_types()
|
||
removed = None
|
||
for t in types:
|
||
if t.get("id") == item_id:
|
||
removed = t
|
||
break
|
||
new_types = [t for t in types if t["id"] != item_id]
|
||
if len(new_types) == len(types):
|
||
raise HTTPException(status_code=404, detail="Type not found")
|
||
save_shipping_types(new_types)
|
||
|
||
# Удаляем обработанные criteria
|
||
try:
|
||
if removed:
|
||
type_name = removed.get("name", "")
|
||
mapping = load_processed_shipping_criteria()
|
||
if type_name in mapping:
|
||
mapping.pop(type_name, None)
|
||
save_processed_shipping_criteria(mapping)
|
||
rag.reload_shipping_types()
|
||
except Exception as e:
|
||
logger.warning(f"Failed to delete processed criteria on delete: {e}")
|
||
return {"ok": True}
|
||
|
||
|
||
if __name__ == "__main__":
|
||
uvicorn.run(app, host="0.0.0.0", port=8000)
|