NEKReport/main(1).py

503 lines
18 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse, Response
from typing import List, Optional, Dict
from fastapi import FastAPI, UploadFile, File, HTTPException, Form, Body
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import uvicorn
from rag_engine_gemini import RAGEngineGemini
import logging
from prometheus_fastapi_instrumentator import Instrumentator
import asyncio
import hashlib
from datetime import datetime
from fastapi.middleware.cors import CORSMiddleware
import os
import json
import base64
from urllib.parse import quote
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
os.makedirs("/opt/rag-gemini/app/reports", exist_ok=True)
SHIPPING_TYPES_FILE = os.path.join(os.path.dirname(__file__), "shipping_types.json")
PROCESSED_SHIPPING_TYPES_FILE = os.path.join(
os.path.dirname(__file__), "shipping_types_processed.json"
)
class ShippingTypeBase(BaseModel):
name: str
criteria: str = ""
keywords: List[str] = []
employee_email: str = ""
confirmation_template: str = ""
info_request_template: str = ""
class ShippingTypeCreate(ShippingTypeBase):
pass
class ShippingTypeUpdate(ShippingTypeBase):
pass
class ShippingType(ShippingTypeBase):
id: int
def load_shipping_types():
"""Загружает типы перевозок с нормализацией ключей"""
if not os.path.exists(SHIPPING_TYPES_FILE):
return []
with open(SHIPPING_TYPES_FILE, "r", encoding="utf-8") as f:
types = json.load(f)
# Нормализуем ключи (убираем пробелы) и keywords
normalized_types = []
for t in types:
normalized = {}
for key, value in t.items():
clean_key = key.strip()
if clean_key == "keywords" and isinstance(value, list):
# Нормализуем keywords - каждый элемент отдельно, убираем пробелы и кавычки
normalized[clean_key] = [kw.strip().strip('"').strip("'") for kw in value if kw.strip()]
elif isinstance(value, str):
normalized[clean_key] = value.strip()
else:
normalized[clean_key] = value
normalized_types.append(normalized)
return normalized_types
def save_shipping_types(types):
"""Сохраняет типы перевозок в файл"""
with open(SHIPPING_TYPES_FILE, "w", encoding="utf-8") as f:
json.dump(types, f, ensure_ascii=False, indent=2)
def load_processed_shipping_criteria() -> Dict[str, str]:
"""Loads AI-processed criteria per shipping type name."""
if not os.path.exists(PROCESSED_SHIPPING_TYPES_FILE):
return {}
try:
with open(PROCESSED_SHIPPING_TYPES_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict):
return {str(k): str(v) for k, v in data.items() if v is not None}
return {}
except Exception as e:
logger.warning(f"Failed to load processed shipping criteria: {e}")
return {}
def save_processed_shipping_criteria(mapping: Dict[str, str]) -> None:
"""Saves AI-processed criteria per shipping type name."""
with open(PROCESSED_SHIPPING_TYPES_FILE, "w", encoding="utf-8") as f:
json.dump(mapping, f, ensure_ascii=False, indent=2)
def upsert_processed_shipping_criteria(type_name: str, processed_criteria: str) -> None:
if not isinstance(type_name, str) or not type_name.strip():
return
processed_criteria = processed_criteria if isinstance(processed_criteria, str) else ""
mapping = load_processed_shipping_criteria()
mapping[type_name] = processed_criteria
save_processed_shipping_criteria(mapping)
app = FastAPI(title="SEPTEM Cargo RAG System")
app.mount("/addin", StaticFiles(directory=os.path.join(os.getcwd(), "addin"), html=True), name="addin")
OLD_HOST = "nec.septem.pro"
NEW_HOST = "nec.clients.septem.pro"
@app.middleware("http")
async def redirect_legacy_host(request, call_next):
host = request.headers.get("host", "").split(":")[0].lower()
if host == OLD_HOST:
target_url = f"https://{NEW_HOST}{request.url.path}"
if request.url.query:
target_url = f"{target_url}?{request.url.query}"
return JSONResponse(
status_code=308,
content={"detail": "Permanent Redirect"},
headers={"Location": target_url},
)
return await call_next(request)
@app.get("/addin/taskpane.html")
async def get_taskpane():
if os.path.exists("addin/taskpane.html"):
return FileResponse("addin/taskpane.html")
elif os.path.exists("frontend/taskpane.html"):
return FileResponse("frontend/taskpane.html")
else:
return {"error": "taskpane.html not found"}
@app.get("/addin/commands.html")
async def get_commands():
if os.path.exists("addin/commands.html"):
return FileResponse("addin/commands.html")
elif os.path.exists("frontend/commands.html"):
return FileResponse("frontend/commands.html")
else:
return {"error": "commands.html not found"}
rag = RAGEngineGemini()
Instrumentator().instrument(app).expose(app)
app.add_middleware(
CORSMiddleware,
allow_origins=[
"https://nec.clients.septem.pro",
"https://localhost:3000",
"http://localhost:8501"
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
expose_headers=["Content-Disposition", "Content-Length"]
)
class EmailAttachment(BaseModel):
filename: str
size: int
content: Optional[str] = None
class OutlookEmail(BaseModel):
id: str
subject: str
sender: str
senderName: Optional[str] = None
body: str
body_html: Optional[str] = None
receivedTime: Optional[str] = None
to: Optional[str] = None
cc: Optional[str] = None
attachments: List[EmailAttachment] = []
class OutlookEmailsRequest(BaseModel):
emails: List[OutlookEmail]
session_id: Optional[str] = None
class CargoQueryRequest(BaseModel):
query: str
session_id: Optional[str] = None
top_k: int = 10
class CargoQueryResponse(BaseModel):
answer: str
structured_data: dict
sources: list
total_emails_analyzed: int
class AnalyzeCargoRequest(BaseModel):
email_ids: List[str]
class CargoLearningRequest(BaseModel):
"""Сохранение примера для обучения: переписка + структурированный ответ (как в отчёте)."""
structured_data: dict
session_id: Optional[str] = None
context_preview: Optional[str] = None
notes: Optional[str] = None
@app.post("/record-cargo-learning")
async def record_cargo_learning(req: CargoLearningRequest):
"""
Записывает пару «контекст писем → JSON отчёта» в локальное хранилище few-shot.
Если передан session_id, текст писем подставляется из текущей сессии сервера.
"""
try:
ok = rag.record_cargo_learning(
structured_data=req.structured_data,
session_id=req.session_id,
context_preview=req.context_preview,
notes=req.notes,
)
return {"status": "ok" if ok else "skipped", "stored": ok}
except Exception as e:
logger.error(f"record-cargo-learning error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/process-outlook-emails")
async def process_outlook_emails(request: OutlookEmailsRequest):
try:
session_id = await rag.process_outlook_emails(
[email.model_dump() for email in request.emails],
session_id=request.session_id
)
return {
"status": "success",
"session_id": session_id,
"emails_processed": len(request.emails)
}
except Exception as e:
logger.error(f"Process Outlook emails error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/query-cargo", response_model=CargoQueryResponse)
async def query_cargo(request: CargoQueryRequest):
try:
result = await rag.query_cargo_info(
request.query,
request.session_id,
request.top_k
)
return result
except Exception as e:
logger.error(f"Cargo query error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/generate-cargo-report")
async def generate_cargo_report(session_id: str = Body(..., embed=True)):
try:
result = await rag.generate_cargo_report(session_id)
return result
except Exception as e:
logger.error(f"Generate report error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/email-sessions/{session_id}")
async def get_session_info(session_id: str):
return {
"session_id": session_id,
"created_at": datetime.now().isoformat(),
"emails_count": 10
}
@app.post("/upload", include_in_schema=False)
async def upload_document(file: UploadFile = File(...)):
try:
content = await file.read()
doc_id = hashlib.md5(file.filename.encode()).hexdigest()
asyncio.create_task(rag.process_document(content, file.filename))
return {"status": "processing", "document_id": doc_id}
except Exception as e:
logger.error(f"Upload error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health():
return {"status": "healthy"}
# =============================================================================
# 🔥 ENDPOINT ДЛЯ ПРОСМОТРА ВЛОЖЕНИЙ В БРАУЗЕРЕ (ИСПРАВЛЕННЫЙ)
# =============================================================================
@app.get("/attachments/{session_id}/{email_index}/{attachment_index}")
async def get_attachment(session_id: str, email_index: int, attachment_index: int):
"""
Возвращает оригинальный файл вложения для просмотра в браузере.
Поддерживает PDF, DOCX, XLSX, изображения и другие форматы.
Корректно обрабатывает русские имена файлов (RFC 5987).
"""
att = rag.get_attachment(session_id, email_index, attachment_index)
if att is None:
raise HTTPException(status_code=404, detail="Attachment not found")
filename = att.get("filename", "attachment")
content_base64 = att.get("content_base64")
# 🔥 Функция для безопасного формирования заголовка Content-Disposition
def make_content_disposition(filename: str, disposition: str = "inline") -> str:
"""
Формирует Content-Disposition с поддержкой UTF-8 имён файлов (RFC 5987).
Совместимо со старыми и новыми браузерами.
"""
# ASCII-версия для старых браузеров (fallback)
ascii_filename = filename.encode('ascii', 'ignore').decode('ascii') or 'attachment'
# UTF-8 версия с URL-кодированием для современных браузеров (RFC 5987)
utf8_filename = quote(filename, safe='')
return f'{disposition}; filename="{ascii_filename}"; filename*=UTF-8\'\'{utf8_filename}'
# Если нет оригинального содержимого, возвращаем извлечённый текст
if not content_base64:
text = att.get("text", "")
return Response(
content=text,
media_type="text/plain; charset=utf-8",
headers={
"Content-Disposition": make_content_disposition(filename + ".txt"),
"Content-Length": str(len(text.encode('utf-8'))),
"Access-Control-Expose-Headers": "Content-Disposition, Content-Length"
}
)
# 🔥 Декодируем base64 и определяем MIME тип по расширению
try:
file_content = base64.b64decode(content_base64)
# Определяем MIME тип по расширению файла
ext = filename.lower().split('.')[-1] if '.' in filename else ''
mime_types = {
'pdf': 'application/pdf',
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xls': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'txt': 'text/plain',
'csv': 'text/csv',
'png': 'image/png',
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'gif': 'image/gif',
'bmp': 'image/bmp',
'zip': 'application/zip',
'rar': 'application/vnd.rar',
'ppt': 'application/vnd.ms-powerpoint',
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'rtf': 'application/rtf',
'xml': 'application/xml',
'json': 'application/json',
}
media_type = mime_types.get(ext, 'application/octet-stream')
# 🔥 Возвращаем файл с правильным заголовком Content-Disposition
return Response(
content=file_content,
media_type=media_type,
headers={
"Content-Disposition": make_content_disposition(filename),
"Content-Length": str(len(file_content)),
"Access-Control-Expose-Headers": "Content-Disposition, Content-Length"
}
)
except Exception as e:
logger.error(f"Error serving attachment: {e}")
raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
@app.get("/shipping-types", response_model=List[ShippingType])
async def get_shipping_types():
return load_shipping_types()
@app.post("/shipping-types", response_model=ShippingType)
async def create_shipping_type(item: ShippingTypeCreate):
types = load_shipping_types()
new_id = max([t["id"] for t in types], default=0) + 1
new_item = item.model_dump()
new_item["id"] = new_id
types.append(new_item)
save_shipping_types(types)
# AI-обработка criteria и сохранение результата
try:
type_name = new_item.get("name", "")
criteria_text = new_item.get("criteria", "") or ""
processed = rag.process_shipping_type_criteria(criteria_text)
upsert_processed_shipping_criteria(type_name, processed)
rag.reload_shipping_types()
except Exception as e:
logger.warning(f"AI criteria processing failed on create: {e}")
return new_item
@app.put("/shipping-types/{item_id}", response_model=ShippingType)
async def update_shipping_type(item_id: int, item: ShippingTypeUpdate):
types = load_shipping_types()
for t in types:
if t["id"] == item_id:
old_name = t.get("name", "")
t.update(item.model_dump())
save_shipping_types(types)
# AI-обработка criteria и сохранение результата
try:
type_name = t.get("name", "")
criteria_text = t.get("criteria", "") or ""
processed = rag.process_shipping_type_criteria(criteria_text)
# если поменяли имя типа — чистим старый ключ
if isinstance(old_name, str) and old_name.strip() and old_name != type_name:
mapping = load_processed_shipping_criteria()
if old_name in mapping:
mapping.pop(old_name, None)
save_processed_shipping_criteria(mapping)
upsert_processed_shipping_criteria(type_name, processed)
rag.reload_shipping_types()
except Exception as e:
logger.warning(f"AI criteria processing failed on update: {e}")
return t
raise HTTPException(status_code=404, detail="Type not found")
@app.get("/email-attachments/{session_id}")
async def get_email_attachments(session_id: str):
emails = rag.sessions.get(session_id, [])
files = []
for email in emails:
for att in email.get("attachments", []):
filename = att.get("filename","")
ext = filename.split(".")[-1].lower()
# игнорируем изображения
if ext in ["png","jpg","jpeg","gif","bmp","tiff","webp"]:
continue
if att.get("content_base64"):
files.append({
"filename": filename,
"content_base64": att["content_base64"]
})
return files
@app.delete("/shipping-types/{item_id}")
async def delete_shipping_type(item_id: int):
types = load_shipping_types()
removed = None
for t in types:
if t.get("id") == item_id:
removed = t
break
new_types = [t for t in types if t["id"] != item_id]
if len(new_types) == len(types):
raise HTTPException(status_code=404, detail="Type not found")
save_shipping_types(new_types)
# Удаляем обработанные criteria
try:
if removed:
type_name = removed.get("name", "")
mapping = load_processed_shipping_criteria()
if type_name in mapping:
mapping.pop(type_name, None)
save_processed_shipping_criteria(mapping)
rag.reload_shipping_types()
except Exception as e:
logger.warning(f"Failed to delete processed criteria on delete: {e}")
return {"ok": True}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)