|
1 | 1 | import { and, eq } from 'drizzle-orm' |
| 2 | +import { htmlToText } from 'html-to-text' |
2 | 3 | import { nanoid } from 'nanoid' |
3 | 4 | import { createLogger } from '@/lib/logs/console/logger' |
4 | 5 | import { hasProcessedMessage, markMessageAsProcessed } from '@/lib/redis' |
@@ -85,62 +86,16 @@ export interface OutlookWebhookPayload { |
85 | 86 | */ |
86 | 87 | function convertHtmlToPlainText(html: string): string { |
87 | 88 | if (!html) return '' |
88 | | - |
89 | | - let working = html |
90 | | - |
91 | | - // Remove script and style content |
92 | | - working = working.replace(/<script[\s\S]*?>[\s\S]*?<\/script>/gi, '') |
93 | | - working = working.replace(/<style[\s\S]*?>[\s\S]*?<\/style>/gi, '') |
94 | | - |
95 | | - // Line breaks for common block-level tags |
96 | | - working = working |
97 | | - .replace(/<br\s*\/?>(?=\s|$)/gi, '\n') |
98 | | - .replace(/<\/(p|div|li|h[1-6]|tr)>/gi, '\n') |
99 | | - .replace(/<(p|div|li|h[1-6]|tr)[^>]*>/gi, '') |
100 | | - .replace(/<\/(td|th)>/gi, '\t') |
101 | | - |
102 | | - // Remove all remaining tags |
103 | | - working = working.replace(/<[^>]+>/g, '') |
104 | | - |
105 | | - // Decode common HTML entities |
106 | | - const entityMap: Record<string, string> = { |
107 | | - ' ': ' ', |
108 | | - '&': '&', |
109 | | - '<': '<', |
110 | | - '>': '>', |
111 | | - '"': '"', |
112 | | - ''': "'", |
113 | | - } |
114 | | - for (const [entity, char] of Object.entries(entityMap)) { |
115 | | - working = working.split(entity).join(char) |
116 | | - } |
117 | | - // Numeric entities (decimal) |
118 | | - working = working.replace(/&#(\d+);/g, (_, dec: string) => { |
119 | | - const code = Number(dec) |
120 | | - return Number.isFinite(code) ? String.fromCharCode(code) : _ |
| 89 | + return htmlToText(html, { |
| 90 | + wordwrap: false, |
| 91 | + selectors: [ |
| 92 | + { selector: 'a', options: { hideLinkHrefIfSameAsText: true, noAnchorUrl: true } }, |
| 93 | + { selector: 'img', format: 'skip' }, |
| 94 | + { selector: 'script', format: 'skip' }, |
| 95 | + { selector: 'style', format: 'skip' }, |
| 96 | + ], |
| 97 | + preserveNewlines: true, |
121 | 98 | }) |
122 | | - // Numeric entities (hex) |
123 | | - working = working.replace(/&#x([0-9a-fA-F]+);/g, (_, hex: string) => { |
124 | | - const code = Number.parseInt(hex, 16) |
125 | | - return Number.isFinite(code) ? String.fromCharCode(code) : _ |
126 | | - }) |
127 | | - |
128 | | - // Normalize whitespace |
129 | | - working = working |
130 | | - .replace(/\r\n/g, '\n') |
131 | | - .replace(/\r/g, '\n') |
132 | | - .replace(/\t/g, ' ') |
133 | | - .replace(/\u00A0/g, ' ') |
134 | | - |
135 | | - // Collapse excessive blank lines and trim |
136 | | - working = working |
137 | | - .split('\n') |
138 | | - .map((line) => line.trimEnd()) |
139 | | - .join('\n') |
140 | | - .replace(/\n{3,}/g, '\n\n') |
141 | | - .trim() |
142 | | - |
143 | | - return working |
144 | 99 | } |
145 | 100 |
|
146 | 101 | export async function pollOutlookWebhooks() { |
|
0 commit comments