from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse, Response from typing import List, Optional, Dict from fastapi import FastAPI, UploadFile, File, HTTPException, Form, Body from fastapi.responses import JSONResponse from pydantic import BaseModel import uvicorn from rag_engine_gemini import RAGEngineGemini import logging from prometheus_fastapi_instrumentator import Instrumentator import asyncio import hashlib from datetime import datetime from fastapi.middleware.cors import CORSMiddleware import os import json import base64 from urllib.parse import quote logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) os.makedirs("/opt/rag-gemini/app/reports", exist_ok=True) SHIPPING_TYPES_FILE = os.path.join(os.path.dirname(__file__), "shipping_types.json") PROCESSED_SHIPPING_TYPES_FILE = os.path.join( os.path.dirname(__file__), "shipping_types_processed.json" ) class ShippingTypeBase(BaseModel): name: str criteria: str = "" keywords: List[str] = [] employee_email: str = "" confirmation_template: str = "" info_request_template: str = "" class ShippingTypeCreate(ShippingTypeBase): pass class ShippingTypeUpdate(ShippingTypeBase): pass class ShippingType(ShippingTypeBase): id: int def load_shipping_types(): """Загружает типы перевозок с нормализацией ключей""" if not os.path.exists(SHIPPING_TYPES_FILE): return [] with open(SHIPPING_TYPES_FILE, "r", encoding="utf-8") as f: types = json.load(f) # Нормализуем ключи (убираем пробелы) и keywords normalized_types = [] for t in types: normalized = {} for key, value in t.items(): clean_key = key.strip() if clean_key == "keywords" and isinstance(value, list): # Нормализуем keywords - каждый элемент отдельно, убираем пробелы и кавычки normalized[clean_key] = [kw.strip().strip('"').strip("'") for kw in value if kw.strip()] elif isinstance(value, str): normalized[clean_key] = value.strip() else: normalized[clean_key] = value normalized_types.append(normalized) return normalized_types def save_shipping_types(types): """Сохраняет типы перевозок в файл""" with open(SHIPPING_TYPES_FILE, "w", encoding="utf-8") as f: json.dump(types, f, ensure_ascii=False, indent=2) def load_processed_shipping_criteria() -> Dict[str, str]: """Loads AI-processed criteria per shipping type name.""" if not os.path.exists(PROCESSED_SHIPPING_TYPES_FILE): return {} try: with open(PROCESSED_SHIPPING_TYPES_FILE, "r", encoding="utf-8") as f: data = json.load(f) if isinstance(data, dict): return {str(k): str(v) for k, v in data.items() if v is not None} return {} except Exception as e: logger.warning(f"Failed to load processed shipping criteria: {e}") return {} def save_processed_shipping_criteria(mapping: Dict[str, str]) -> None: """Saves AI-processed criteria per shipping type name.""" with open(PROCESSED_SHIPPING_TYPES_FILE, "w", encoding="utf-8") as f: json.dump(mapping, f, ensure_ascii=False, indent=2) def upsert_processed_shipping_criteria(type_name: str, processed_criteria: str) -> None: if not isinstance(type_name, str) or not type_name.strip(): return processed_criteria = processed_criteria if isinstance(processed_criteria, str) else "" mapping = load_processed_shipping_criteria() mapping[type_name] = processed_criteria save_processed_shipping_criteria(mapping) app = FastAPI(title="SEPTEM Cargo RAG System") app.mount("/addin", StaticFiles(directory=os.path.join(os.getcwd(), "addin"), html=True), name="addin") OLD_HOST = "nec.septem.pro" NEW_HOST = "nec.clients.septem.pro" @app.middleware("http") async def redirect_legacy_host(request, call_next): host = request.headers.get("host", "").split(":")[0].lower() if host == OLD_HOST: target_url = f"https://{NEW_HOST}{request.url.path}" if request.url.query: target_url = f"{target_url}?{request.url.query}" return JSONResponse( status_code=308, content={"detail": "Permanent Redirect"}, headers={"Location": target_url}, ) return await call_next(request) @app.get("/addin/taskpane.html") async def get_taskpane(): if os.path.exists("addin/taskpane.html"): return FileResponse("addin/taskpane.html") elif os.path.exists("frontend/taskpane.html"): return FileResponse("frontend/taskpane.html") else: return {"error": "taskpane.html not found"} @app.get("/addin/commands.html") async def get_commands(): if os.path.exists("addin/commands.html"): return FileResponse("addin/commands.html") elif os.path.exists("frontend/commands.html"): return FileResponse("frontend/commands.html") else: return {"error": "commands.html not found"} rag = RAGEngineGemini() Instrumentator().instrument(app).expose(app) app.add_middleware( CORSMiddleware, allow_origins=[ "https://nec.clients.septem.pro", "https://localhost:3000", "http://localhost:8501" ], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], expose_headers=["Content-Disposition", "Content-Length"] ) class EmailAttachment(BaseModel): filename: str size: int content: Optional[str] = None class OutlookEmail(BaseModel): id: str subject: str sender: str senderName: Optional[str] = None body: str body_html: Optional[str] = None receivedTime: Optional[str] = None to: Optional[str] = None cc: Optional[str] = None attachments: List[EmailAttachment] = [] class OutlookEmailsRequest(BaseModel): emails: List[OutlookEmail] session_id: Optional[str] = None class CargoQueryRequest(BaseModel): query: str session_id: Optional[str] = None top_k: int = 10 class CargoQueryResponse(BaseModel): answer: str structured_data: dict sources: list total_emails_analyzed: int class AnalyzeCargoRequest(BaseModel): email_ids: List[str] class CargoLearningRequest(BaseModel): """Сохранение примера для обучения: переписка + структурированный ответ (как в отчёте).""" structured_data: dict session_id: Optional[str] = None context_preview: Optional[str] = None notes: Optional[str] = None @app.post("/record-cargo-learning") async def record_cargo_learning(req: CargoLearningRequest): """ Записывает пару «контекст писем → JSON отчёта» в локальное хранилище few-shot. Если передан session_id, текст писем подставляется из текущей сессии сервера. """ try: ok = rag.record_cargo_learning( structured_data=req.structured_data, session_id=req.session_id, context_preview=req.context_preview, notes=req.notes, ) return {"status": "ok" if ok else "skipped", "stored": ok} except Exception as e: logger.error(f"record-cargo-learning error: {e}") raise HTTPException(status_code=500, detail=str(e)) @app.post("/process-outlook-emails") async def process_outlook_emails(request: OutlookEmailsRequest): try: session_id = await rag.process_outlook_emails( [email.model_dump() for email in request.emails], session_id=request.session_id ) return { "status": "success", "session_id": session_id, "emails_processed": len(request.emails) } except Exception as e: logger.error(f"Process Outlook emails error: {e}") raise HTTPException(status_code=500, detail=str(e)) @app.post("/query-cargo", response_model=CargoQueryResponse) async def query_cargo(request: CargoQueryRequest): try: result = await rag.query_cargo_info( request.query, request.session_id, request.top_k ) return result except Exception as e: logger.error(f"Cargo query error: {e}") raise HTTPException(status_code=500, detail=str(e)) @app.post("/generate-cargo-report") async def generate_cargo_report(session_id: str = Body(..., embed=True)): try: result = await rag.generate_cargo_report(session_id) return result except Exception as e: logger.error(f"Generate report error: {e}") raise HTTPException(status_code=500, detail=str(e)) @app.get("/email-sessions/{session_id}") async def get_session_info(session_id: str): return { "session_id": session_id, "created_at": datetime.now().isoformat(), "emails_count": 10 } @app.post("/upload", include_in_schema=False) async def upload_document(file: UploadFile = File(...)): try: content = await file.read() doc_id = hashlib.md5(file.filename.encode()).hexdigest() asyncio.create_task(rag.process_document(content, file.filename)) return {"status": "processing", "document_id": doc_id} except Exception as e: logger.error(f"Upload error: {e}") raise HTTPException(status_code=500, detail=str(e)) @app.get("/health") async def health(): return {"status": "healthy"} # ============================================================================= # 🔥 ENDPOINT ДЛЯ ПРОСМОТРА ВЛОЖЕНИЙ В БРАУЗЕРЕ (ИСПРАВЛЕННЫЙ) # ============================================================================= @app.get("/attachments/{session_id}/{email_index}/{attachment_index}") async def get_attachment(session_id: str, email_index: int, attachment_index: int): """ Возвращает оригинальный файл вложения для просмотра в браузере. Поддерживает PDF, DOCX, XLSX, изображения и другие форматы. Корректно обрабатывает русские имена файлов (RFC 5987). """ att = rag.get_attachment(session_id, email_index, attachment_index) if att is None: raise HTTPException(status_code=404, detail="Attachment not found") filename = att.get("filename", "attachment") content_base64 = att.get("content_base64") # 🔥 Функция для безопасного формирования заголовка Content-Disposition def make_content_disposition(filename: str, disposition: str = "inline") -> str: """ Формирует Content-Disposition с поддержкой UTF-8 имён файлов (RFC 5987). Совместимо со старыми и новыми браузерами. """ # ASCII-версия для старых браузеров (fallback) ascii_filename = filename.encode('ascii', 'ignore').decode('ascii') or 'attachment' # UTF-8 версия с URL-кодированием для современных браузеров (RFC 5987) utf8_filename = quote(filename, safe='') return f'{disposition}; filename="{ascii_filename}"; filename*=UTF-8\'\'{utf8_filename}' # Если нет оригинального содержимого, возвращаем извлечённый текст if not content_base64: text = att.get("text", "") return Response( content=text, media_type="text/plain; charset=utf-8", headers={ "Content-Disposition": make_content_disposition(filename + ".txt"), "Content-Length": str(len(text.encode('utf-8'))), "Access-Control-Expose-Headers": "Content-Disposition, Content-Length" } ) # 🔥 Декодируем base64 и определяем MIME тип по расширению try: file_content = base64.b64decode(content_base64) # Определяем MIME тип по расширению файла ext = filename.lower().split('.')[-1] if '.' in filename else '' mime_types = { 'pdf': 'application/pdf', 'doc': 'application/msword', 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'xls': 'application/vnd.ms-excel', 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'txt': 'text/plain', 'csv': 'text/csv', 'png': 'image/png', 'jpg': 'image/jpeg', 'jpeg': 'image/jpeg', 'gif': 'image/gif', 'bmp': 'image/bmp', 'zip': 'application/zip', 'rar': 'application/vnd.rar', 'ppt': 'application/vnd.ms-powerpoint', 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'rtf': 'application/rtf', 'xml': 'application/xml', 'json': 'application/json', } media_type = mime_types.get(ext, 'application/octet-stream') # 🔥 Возвращаем файл с правильным заголовком Content-Disposition return Response( content=file_content, media_type=media_type, headers={ "Content-Disposition": make_content_disposition(filename), "Content-Length": str(len(file_content)), "Access-Control-Expose-Headers": "Content-Disposition, Content-Length" } ) except Exception as e: logger.error(f"Error serving attachment: {e}") raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}") @app.get("/shipping-types", response_model=List[ShippingType]) async def get_shipping_types(): return load_shipping_types() @app.post("/shipping-types", response_model=ShippingType) async def create_shipping_type(item: ShippingTypeCreate): types = load_shipping_types() new_id = max([t["id"] for t in types], default=0) + 1 new_item = item.model_dump() new_item["id"] = new_id types.append(new_item) save_shipping_types(types) # AI-обработка criteria и сохранение результата try: type_name = new_item.get("name", "") criteria_text = new_item.get("criteria", "") or "" processed = rag.process_shipping_type_criteria(criteria_text) upsert_processed_shipping_criteria(type_name, processed) rag.reload_shipping_types() except Exception as e: logger.warning(f"AI criteria processing failed on create: {e}") return new_item @app.put("/shipping-types/{item_id}", response_model=ShippingType) async def update_shipping_type(item_id: int, item: ShippingTypeUpdate): types = load_shipping_types() for t in types: if t["id"] == item_id: old_name = t.get("name", "") t.update(item.model_dump()) save_shipping_types(types) # AI-обработка criteria и сохранение результата try: type_name = t.get("name", "") criteria_text = t.get("criteria", "") or "" processed = rag.process_shipping_type_criteria(criteria_text) # если поменяли имя типа — чистим старый ключ if isinstance(old_name, str) and old_name.strip() and old_name != type_name: mapping = load_processed_shipping_criteria() if old_name in mapping: mapping.pop(old_name, None) save_processed_shipping_criteria(mapping) upsert_processed_shipping_criteria(type_name, processed) rag.reload_shipping_types() except Exception as e: logger.warning(f"AI criteria processing failed on update: {e}") return t raise HTTPException(status_code=404, detail="Type not found") @app.get("/email-attachments/{session_id}") async def get_email_attachments(session_id: str): emails = rag.sessions.get(session_id, []) files = [] for email in emails: for att in email.get("attachments", []): filename = att.get("filename","") ext = filename.split(".")[-1].lower() # игнорируем изображения if ext in ["png","jpg","jpeg","gif","bmp","tiff","webp"]: continue if att.get("content_base64"): files.append({ "filename": filename, "content_base64": att["content_base64"] }) return files @app.delete("/shipping-types/{item_id}") async def delete_shipping_type(item_id: int): types = load_shipping_types() removed = None for t in types: if t.get("id") == item_id: removed = t break new_types = [t for t in types if t["id"] != item_id] if len(new_types) == len(types): raise HTTPException(status_code=404, detail="Type not found") save_shipping_types(new_types) # Удаляем обработанные criteria try: if removed: type_name = removed.get("name", "") mapping = load_processed_shipping_criteria() if type_name in mapping: mapping.pop(type_name, None) save_processed_shipping_criteria(mapping) rag.reload_shipping_types() except Exception as e: logger.warning(f"Failed to delete processed criteria on delete: {e}") return {"ok": True} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000)