Skip to content

Commit a4d274d

Browse files
Adam GoughAdam Gough
authored andcommitted
added html func
1 parent cddb39a commit a4d274d

3 files changed

Lines changed: 16 additions & 55 deletions

File tree

apps/sim/lib/webhooks/outlook-polling-service.ts

Lines changed: 10 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { and, eq } from 'drizzle-orm'
2+
import { htmlToText } from 'html-to-text'
23
import { nanoid } from 'nanoid'
34
import { createLogger } from '@/lib/logs/console/logger'
45
import { hasProcessedMessage, markMessageAsProcessed } from '@/lib/redis'
@@ -85,62 +86,16 @@ export interface OutlookWebhookPayload {
8586
*/
8687
function convertHtmlToPlainText(html: string): string {
8788
if (!html) return ''
88-
89-
let working = html
90-
91-
// Remove script and style content
92-
working = working.replace(/<script[\s\S]*?>[\s\S]*?<\/script>/gi, '')
93-
working = working.replace(/<style[\s\S]*?>[\s\S]*?<\/style>/gi, '')
94-
95-
// Line breaks for common block-level tags
96-
working = working
97-
.replace(/<br\s*\/?>(?=\s|$)/gi, '\n')
98-
.replace(/<\/(p|div|li|h[1-6]|tr)>/gi, '\n')
99-
.replace(/<(p|div|li|h[1-6]|tr)[^>]*>/gi, '')
100-
.replace(/<\/(td|th)>/gi, '\t')
101-
102-
// Remove all remaining tags
103-
working = working.replace(/<[^>]+>/g, '')
104-
105-
// Decode common HTML entities
106-
const entityMap: Record<string, string> = {
107-
'&nbsp;': ' ',
108-
'&amp;': '&',
109-
'&lt;': '<',
110-
'&gt;': '>',
111-
'&quot;': '"',
112-
'&#39;': "'",
113-
}
114-
for (const [entity, char] of Object.entries(entityMap)) {
115-
working = working.split(entity).join(char)
116-
}
117-
// Numeric entities (decimal)
118-
working = working.replace(/&#(\d+);/g, (_, dec: string) => {
119-
const code = Number(dec)
120-
return Number.isFinite(code) ? String.fromCharCode(code) : _
89+
return htmlToText(html, {
90+
wordwrap: false,
91+
selectors: [
92+
{ selector: 'a', options: { hideLinkHrefIfSameAsText: true, noAnchorUrl: true } },
93+
{ selector: 'img', format: 'skip' },
94+
{ selector: 'script', format: 'skip' },
95+
{ selector: 'style', format: 'skip' },
96+
],
97+
preserveNewlines: true,
12198
})
122-
// Numeric entities (hex)
123-
working = working.replace(/&#x([0-9a-fA-F]+);/g, (_, hex: string) => {
124-
const code = Number.parseInt(hex, 16)
125-
return Number.isFinite(code) ? String.fromCharCode(code) : _
126-
})
127-
128-
// Normalize whitespace
129-
working = working
130-
.replace(/\r\n/g, '\n')
131-
.replace(/\r/g, '\n')
132-
.replace(/\t/g, ' ')
133-
.replace(/\u00A0/g, ' ')
134-
135-
// Collapse excessive blank lines and trim
136-
working = working
137-
.split('\n')
138-
.map((line) => line.trimEnd())
139-
.join('\n')
140-
.replace(/\n{3,}/g, '\n\n')
141-
.trim()
142-
143-
return working
14499
}
145100

146101
export async function pollOutlookWebhooks() {

apps/sim/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
"fuse.js": "7.1.0",
9090
"geist": "1.4.2",
9191
"groq-sdk": "^0.15.0",
92+
"html-to-text": "^9.0.5",
9293
"input-otp": "^1.4.2",
9394
"ioredis": "^5.6.0",
9495
"jose": "6.0.11",
@@ -133,6 +134,7 @@
133134
"@testing-library/react": "^16.3.0",
134135
"@testing-library/user-event": "^14.6.1",
135136
"@trigger.dev/build": "4.0.0",
137+
"@types/html-to-text": "^9.0.4",
136138
"@types/js-yaml": "4.0.9",
137139
"@types/jsdom": "21.1.7",
138140
"@types/lodash": "^4.17.16",

bun.lock

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)