-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetNotionPageContent.ts
201 lines (180 loc) · 6.46 KB
/
getNotionPageContent.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import { ActionDefinition, ActionContext, OutputObject } from 'connery';
import { Client, iteratePaginatedAPI, isFullBlock } from '@notionhq/client';
const actionDefinition: ActionDefinition = {
key: 'getNotionPageContent',
name: 'Get Notion Page Content',
description:
'This action retrieves the content of a Notion page using its URL and the Notion API. It can optionally include instructions before the page content. The action required the Notion page URL and Notion API key connected to this URL. It fetches all content elements including text, media, and toggles, and returns the page content as a single string. It does not extract content form inline DBs.',
type: 'read',
inputParameters: [
{
key: 'notionPageUrl',
name: 'Notion Page URL',
description: 'The URL of the private Notion page to fetch content from.',
type: 'string',
validation: {
required: true,
},
},
{
key: 'notionApiKey',
name: 'Notion API Key',
description: 'API key to authenticate with the Notion API',
type: 'string',
validation: {
required: true,
},
},
{
key: 'instructions',
name: 'Instructions',
description: 'Optional instructions for content processing.',
type: 'string',
validation: {
required: false,
},
},
],
operation: {
handler: handler,
},
outputParameters: [
{
key: 'notionPageContent',
name: 'Notion Page Content',
type: 'string',
validation: {
required: true,
},
},
],
};
export default actionDefinition;
export async function handler({ input }: ActionContext): Promise<OutputObject> {
// Extract the page ID from the provided Notion URL
const notionPageId = extractPageIdFromUrl(input.notionPageUrl);
// Initialize the Notion client
const notion = new Client({ auth: input.notionApiKey });
// Retrieve all blocks of the Notion page
const blocks = await retrieveBlockChildren(notion, notionPageId);
// Process the blocks to get the content as a single string
const pageContent = blocks.map(getTextFromBlock).join('\n');
// Check if the content length is less than 5 characters
if (pageContent.length < 5) {
throw new Error(
`The extracted content is too short: ${pageContent.length} characters. It must be at least 5 characters long.`,
);
}
// Prepare the output based on whether instructions are provided
let output: string;
if (input.instructions) {
output = `Follow these instructions: ${input.instructions}\nContent: ${pageContent}`;
} else {
output = pageContent;
}
// Return the formatted output
return { notionPageContent: output };
}
// Helper function to retrieve all blocks from a Notion page using pagination. Recursively fetches child blocks if they exist.
async function retrieveBlockChildren(notion: Client, id: string) {
const blocks: Array<any> = [];
for await (const block of iteratePaginatedAPI(notion.blocks.children.list, { block_id: id })) {
blocks.push(block);
// Recursively fetch and process child blocks if the block has children
if (isFullBlock(block) && block.has_children) {
const childBlocks = await retrieveBlockChildren(notion, block.id);
blocks.push(...childBlocks); // Add child blocks to the main block array
}
}
return blocks;
}
// Helper function to extract plain text from a rich text object in Notion. Combines all pieces of text within a block into a single string.
const getPlainTextFromRichText = (richText: any) => {
return richText.map((t: any) => t.plain_text).join('');
};
// Helper function to convert a Notion block into a string representation. Handles various block types, including media, tables, and text blocks.
const getTextFromBlock = (block: any) => {
let text;
if (block[block.type]?.rich_text) {
text = getPlainTextFromRichText(block[block.type].rich_text);
} else {
switch (block.type) {
case 'unsupported':
text = '[Unsupported block type]';
break;
case 'bookmark':
text = block.bookmark.url;
break;
case 'child_database':
text = block.child_database.title;
break;
case 'child_page':
text = block.child_page.title;
break;
case 'embed':
case 'video':
case 'file':
case 'image':
case 'pdf':
text = getMediaSourceText(block);
break;
case 'equation':
text = block.equation.expression;
break;
case 'link_preview':
text = block.link_preview.url;
break;
case 'synced_block':
text = block.synced_block.synced_from
? 'This block is synced with a block with the following ID: ' +
block.synced_block.synced_from[block.synced_block.synced_from.type]
: 'Source sync block that another block is synced with.';
break;
case 'table':
text = 'Table width: ' + block.table.table_width;
break;
case 'table_of_contents':
text = 'ToC color: ' + block.table_of_contents.color;
break;
case 'breadcrumb':
case 'column_list':
case 'divider':
text = 'No text available';
break;
default:
text = '[Needs case added]';
break;
}
}
if (block.has_children) {
text = text + ' (Has children)';
}
return block.type + ': ' + text;
};
//Helper function to extract the source text of media blocks, such as images or videos, including any associated captions.
const getMediaSourceText = (block: any) => {
let source, caption;
if (block[block.type].external) {
source = block[block.type].external.url;
} else if (block[block.type].file) {
source = block[block.type].file.url;
} else if (block[block.type].url) {
source = block[block.type].url;
} else {
source = '[Missing case for media block types]: ' + block.type;
}
if (block[block.type].caption.length) {
caption = getPlainTextFromRichText(block[block.type].caption);
return caption + ': ' + source;
}
return source;
};
// Helper function to extract the Notion page ID from the provided URL. The function uses a regular expression to identify and return the page ID.
function extractPageIdFromUrl(url: string): string {
const regex = /([a-f0-9]{32})|([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})/;
const match = url.match(regex);
if (!match) {
throw new Error('Invalid Notion page URL');
}
return match[0].replace(/-/g, ''); // Return the page ID without dashes
}