From 2e8ff517aaa412305dca5c3816a98e44ce132146 Mon Sep 17 00:00:00 2001 From: Akhil Meka Date: Mon, 8 Sep 2025 10:39:02 +0530 Subject: [PATCH] feat: rts file validation --- package.json | 2 + pnpm-lock.yaml | 39 ++++++++ src/fileValidation/ai.ts | 142 +++++++++++++++++++++++++++ src/fileValidation/validate.route.ts | 26 +++++ src/fileValidation/validate.ts | 85 ++++++++++++++++ src/note/note.service.ts | 15 +++ src/routes.ts | 2 + 7 files changed, 311 insertions(+) create mode 100644 src/fileValidation/ai.ts create mode 100644 src/fileValidation/validate.route.ts create mode 100644 src/fileValidation/validate.ts diff --git a/package.json b/package.json index dcc807e..d256de2 100644 --- a/package.json +++ b/package.json @@ -31,6 +31,8 @@ "fastify-zod": "^1.4.0", "lru-cache": "^11.0.2", "mongoose": "^8.9.0", + "openai": "^5.19.1", + "pdfreader": "^3.0.7", "qs": "^6.14.0", "zod": "^3.24.1" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 15f6ac4..bcbd5f6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -59,6 +59,12 @@ importers: mongoose: specifier: ^8.9.0 version: 8.9.0 + openai: + specifier: ^5.19.1 + version: 5.19.1(zod@3.24.1) + pdfreader: + specifier: ^3.0.7 + version: 3.0.7 qs: specifier: ^6.14.0 version: 6.14.0 @@ -973,6 +979,18 @@ packages: once@1.4.0: resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + openai@5.19.1: + resolution: {integrity: sha512-zSqnUF7oR9ksmpusKkpUgkNrj8Sl57U+OyzO8jzc7LUjTMg4DRfR3uCm+EIMA6iw06sRPNp4t7ojp3sCpEUZRQ==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.23.8 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + openapi-types@12.1.3: resolution: {integrity: sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==} @@ -989,6 +1007,17 @@ packages: path-case@3.0.4: resolution: {integrity: sha512-qO4qCFjXqVTrcbPt/hQfhTQ+VhFsqNKOPtytgNKkKxSoEp3XPUQ8ObFuePylOIok5gjn69ry8XiULxCwot3Wfg==} + pdf2json@3.1.4: + resolution: {integrity: sha512-rS+VapXpXZr+5lUpHmRh3ugXdFXp24p1RyG24yP1DMpqP4t0mrYNGpLtpSbWD42PnQ59GIXofxF+yWb7M+3THg==} + engines: {node: '>=18.12.1', npm: '>=8.19.2'} + hasBin: true + bundledDependencies: + - '@xmldom/xmldom' + + pdfreader@3.0.7: + resolution: {integrity: sha512-68Htw7su6HDJGGKv9tkjilRyf8zaHulEKRCgCwx4FE8krcMB8iBtM46Smjjez0jFm45dUKYXJzThyLwCqfQlCQ==} + engines: {node: '>=14'} + pino-abstract-transport@2.0.0: resolution: {integrity: sha512-F63x5tizV6WCh4R6RHyi2Ml+M70DNRXt/+HANowMflpgGFMAym/VKm6G7ZOQRjqN7XbGxK1Lg9t6ZrtzOaivMw==} @@ -2652,6 +2681,10 @@ snapshots: dependencies: wrappy: 1.0.2 + openai@5.19.1(zod@3.24.1): + optionalDependencies: + zod: 3.24.1 + openapi-types@12.1.3: {} p-limit@3.1.0: @@ -2673,6 +2706,12 @@ snapshots: dot-case: 3.0.4 tslib: 2.8.1 + pdf2json@3.1.4: {} + + pdfreader@3.0.7: + dependencies: + pdf2json: 3.1.4 + pino-abstract-transport@2.0.0: dependencies: split2: 4.2.0 diff --git a/src/fileValidation/ai.ts b/src/fileValidation/ai.ts new file mode 100644 index 0000000..1332133 --- /dev/null +++ b/src/fileValidation/ai.ts @@ -0,0 +1,142 @@ +import OpenAI from "openai"; +import fsp from "fs/promises"; +import { PdfReader } from "pdfreader"; + +const openaiClient = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, +}); + +async function queryChatGPT(prompt: string) { + let res = await openaiClient.chat.completions.create({ + model: "gpt-4o", + messages: [ + { + role: "system", + content: prompt, + }, + ], + response_format: { type: "json_object" }, + }); + + return JSON.parse(res.choices[0].message.content); +} + +async function parseBufferAsync(buffer: Buffer): Promise { + let parsedData = ""; + + return new Promise((resolve, reject) => { + new PdfReader().parseBuffer(buffer, (error, item) => { + if (error) return reject(error); + if (!item) return resolve(parsedData); + if (item.text) parsedData += item.text; + }); + }); +} + +export default async function generateNote( + folderPath: string +): Promise { + let energyData = {}; + let checklistData = {}; + let energyDataFlag = false; + let checklistDataFlag = false; + let energyFilePath = ""; + let checklistFilePath = ""; + + const files = await fsp.readdir(folderPath); + + for (const file of files) { + const input = await fsp.readFile(folderPath + `/${file}`); + const pdfData = await parseBufferAsync(input); + + if ( + Object.keys(checklistData).length == 0 && + pdfData.toLowerCase().includes("general contractors") + ) { + const prompt = ` + Extract the following fields and contractor info from the given data and return the response in JSON. + Fields: ["Site Address", "Lot Number", "Estimated Construction Cost", "Total Square Footage", "A/C Square Footage", "Number of Bedrooms", "Number of Bathrooms", "Construction Type", "Number of Stories", "Height", "General Contractors License", "Mechanical Contractors License", "Electrical Contractors License", "Plumbing Contractors License", "Roofing Contractors License", "Concrete/Mason Contractors License", "Confirm the following"] + + Data: ${pdfData} + `; + + checklistDataFlag = true; + checklistFilePath = file; + checklistData = await queryChatGPT(prompt); + } + + if ( + Object.keys(energyData).length == 0 && + pdfData.toLowerCase().includes("energy efficiency") + ) { + const prompt = ` + Extract the following fields form the given data and return the response in JSON. + Fields: [Project Name, address, county] + + Data: ${pdfData} + `; + + energyDataFlag = true; + energyFilePath = file; + energyData = await queryChatGPT(prompt); + } + } + + const addressCheck = {}; + + if (checklistDataFlag && Object.keys(checklistData).length > 0) { + for (const file of files) { + if (file == checklistFilePath) continue; + + const input = await fsp.readFile(folderPath + `${file}`); + const pdfData = await parseBufferAsync(input); + + const prompt = ` + Check if the address in the data given below matches this ${checklistData["Site Address"]}. Return the response in JSON with schema {match: Boolean}. + + Data: ${pdfData} + `; + + const { match } = await queryChatGPT(prompt); + addressCheck[file] = match; + } + } + + let note = ""; + + if (!energyDataFlag) { + note += "Energy Efficiency document not found.\n"; + } + + if (!checklistDataFlag) { + note += `Checklist document not found.\n\n`; + } + + let filesList = ""; + for (const file in addressCheck) { + if (!addressCheck[file]) { + filesList += file.split("/").pop() + "\n"; + } + } + + if (filesList != "") { + note += + "Below files don't have address or the address doesn't match with the address in checklist\n\n"; + note += filesList; + note += "\n"; + } + + if (energyDataFlag || checklistDataFlag) { + note += "\nExtracted Data:\n"; + } + + if (Object.keys(energyData).length > 0) { + note += `\n${JSON.stringify(energyData, null, 2)}\n`; + } + + if (Object.keys(checklistData).length > 0) { + note += `\n${JSON.stringify(checklistData, null, 2)}\n`; + } + + return note; +} diff --git a/src/fileValidation/validate.route.ts b/src/fileValidation/validate.route.ts new file mode 100644 index 0000000..2ae7ad8 --- /dev/null +++ b/src/fileValidation/validate.route.ts @@ -0,0 +1,26 @@ +import { FastifyInstance, FastifyReply, FastifyRequest } from "fastify"; +import { validate } from "./validate"; + +export async function validateRoutes(fastify: FastifyInstance) { + fastify.post( + "/:rtsId", + { + schema: { + params: { + type: "object", + properties: { rtsId: { type: "string" } }, + }, + }, + }, + async (req: FastifyRequest, res: FastifyReply) => { + const { rtsId } = req.params as { rtsId: string }; + + try { + validate(rtsId, req.user.tenantId); + return res.code(200).send(); + } catch (err) { + return err; + } + } + ); +} diff --git a/src/fileValidation/validate.ts b/src/fileValidation/validate.ts new file mode 100644 index 0000000..e63728f --- /dev/null +++ b/src/fileValidation/validate.ts @@ -0,0 +1,85 @@ +import fsp from "fs/promises"; +import axios from "axios"; +import { rtsModel } from "../rts/rts.schema"; +import { getChildren } from "../file/file.service"; +import { getFileUrlS3 } from "../utils/s3"; +import generateNote from "./ai"; +import { createNote, createNoteBot } from "../note/note.service"; + +async function downloadFile(url: string, downloadPath: string) { + try { + const res = await axios({ + url: url, + method: "GET", + responseType: "stream", + }); + + await fsp.writeFile(downloadPath, res.data); + } catch (err) { + console.log(err); + } +} + +async function downloadFileTree( + basePath: string, + recId: string, + tenantId: string +) { + const rts = await rtsModel.findOne({ pid: recId }); + + async function downloadFolder(folderId: string, path: string) { + const items = await getChildren(folderId, tenantId); + for (const item of items) { + if (item.mimeType == "folder") { + const newPath = path + `/${item.name}`; + await fsp.mkdir(newPath); + await downloadFolder(item.pid, newPath); + } else { + await downloadFile( + await getFileUrlS3(item.pid, null, false), + path + `/${item.name}` + ); + } + } + } + + await downloadFolder(recId, basePath); +} + +async function deleteFolder(path: string) { + await fsp.rm(path, { recursive: true, force: true }); +} + +export async function validate(recId: string, tenantId: string) { + const basePath = process.env.BASE_PATH || "/root/tmp/quickerPermit"; + const folderPath = basePath + `/${recId}`; + + try { + await fsp.mkdir(folderPath); + await downloadFileTree(folderPath, recId, tenantId); + + const files = await fsp.readdir(folderPath); + + const notes: { folder: string; note: string }[] = []; + for (const file of files) { + const filePath = folderPath + `/${file}`; + const stats = await fsp.stat(filePath); + if (stats.isDirectory()) { + const note = await generateNote(filePath); + notes.push({ folder: file, note }); + } + } + + let finalNote = ""; + for (const note of notes) { + finalNote += `${note.folder}\n`; + finalNote += `${note.note}\n\n`; + } + + await createNoteBot(finalNote, recId, tenantId); + } catch (err) { + console.log(err); + } finally { + deleteFolder(folderPath); + } +} diff --git a/src/note/note.service.ts b/src/note/note.service.ts index f9afdf0..cb4296a 100644 --- a/src/note/note.service.ts +++ b/src/note/note.service.ts @@ -80,6 +80,21 @@ export async function createNote( return newNote.populate({ path: "createdBy", select: "pid name avatar" }); } +export async function createNoteBot( + content: string, + resourceId: string, + tenantId: string +) { + const newNote = await noteModel.create({ + tenantId: tenantId, + pid: generateId(), + resourceId: resourceId, + content: content, + createdAt: new Date(), + createdBy: "6762acd606db9d07307a302d", + }); +} + export async function updateNote( input: CreateNoteInput, resourceId: string, diff --git a/src/routes.ts b/src/routes.ts index 558cd33..268f27d 100644 --- a/src/routes.ts +++ b/src/routes.ts @@ -19,6 +19,7 @@ import { alertRoutes } from "./alert/alert.route"; import { analyticsRoutes } from "./analytics/analytics.routes"; import { eventRoutes } from "./events/events.route"; import { userConfigRoutes } from "./userConfig/userConfig.route"; +import { validateRoutes } from "./fileValidation/validate.route"; export default async function routes(fastify: FastifyInstance) { fastify.addHook("preHandler", authHandler); @@ -39,5 +40,6 @@ export default async function routes(fastify: FastifyInstance) { fastify.register(paymentRoutes, { prefix: "/payments" }); fastify.register(alertRoutes, { prefix: "/alerts" }); fastify.register(analyticsRoutes, { prefix: "/analytics" }); + fastify.register(validateRoutes, { prefix: "/validate" }); fastify.register(eventRoutes); }