diff --git a/README.md b/README.md
index e36676d..f6482c4 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,13 @@ Blocks listed under the Outline page can be of the following types:
Docu-notion automatically identifies and removes blocks that are either child pages or links to pages located at the root level of the page. If you need to include such blocks within your content, they must be embedded within another block type, like a table or a column, or they should be accompanied by some text within the same block to trick this logic.
+# **Custom Pages**
+
+Docusaurus automatically generates custom pages from the `src/pages` directory, creating corresponding slugs and links. Pages located at the root but outside the 'Outline' are treated as custom pages, converted to markdown, and moved to `src/pages`. This setup supports both standard pages and links to database pages.
+
+**Note on Conflicts**: If the 'Outline' contains content, an `index.md` is generated. However, if there's also an `index.js` in `src/pages`, Docusaurus prioritizes the last processed page. Testing indicates that `src/pages` takes precedence over pages in the `docs` folder, therefore `index.md` will not be taken into account.
+
+
# Custom parsing (Plugins)
Custom parsing logic can be created using plugins. See the [plugin readme](src/plugins/README.md).
diff --git a/package.json b/package.json
index f56e683..480a4fc 100644
--- a/package.json
+++ b/package.json
@@ -1,5 +1,6 @@
{
"scripts": {
+ "sync": "rsync -av --delete docs/ ../docs.kira.network/docs/",
"test": "vitest",
"build": "npm run test -- --run && tsc && cp ./src/css/*.css dist/ && echo Build successful",
"build-only": "tsc && cp ./src/css/*.css dist/",
@@ -11,6 +12,7 @@
"// typescript check": "",
"tsc": "tsc",
"// test out with a private sample notion db": "",
+ "pull": "npm run ts -- -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_SAMPLE_ROOT_PAGE --log-level verbose",
"large-site-test": "npm run ts -- -n $SIL_BLOOM_DOCS_NOTION_TOKEN -r $SIL_BLOOM_DOCS_NOTION_ROOT_PAGE --locales en,fr --log-level debug",
"pull-test-tagged": "npm run ts -- -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_TEST_ROOT_PAGE_ID --log-level debug --status-tag test",
"pull-test-css": "npm run ts -- --css-output-directory ./test/css -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_TEST_ROOT_PAGE_ID --log-level debug --status-tag test",
diff --git a/src/HierarchicalNamedLayoutStrategy.ts b/src/HierarchicalNamedLayoutStrategy.ts
index dbbbd17..815aeff 100644
--- a/src/HierarchicalNamedLayoutStrategy.ts
+++ b/src/HierarchicalNamedLayoutStrategy.ts
@@ -1,7 +1,8 @@
import * as fs from "fs-extra";
import sanitize from "sanitize-filename";
import { LayoutStrategy } from "./LayoutStrategy";
-import { NotionPage } from "./NotionPage";
+import { NotionPage, PageSubType } from "./NotionPage";
+import { warning } from "./log";
// This strategy gives us a file tree that mirrors that of notion.
// Each level in the outline becomes a directory, and each file bears the name of the Notion document.
@@ -36,13 +37,20 @@ export class HierarchicalNamedLayoutStrategy extends LayoutStrategy {
.replaceAll("'", "")
.replaceAll("?", "-");
+ let path;
+ if (page.subtype === PageSubType.Custom) {
+ // For Custom pages, store them directly in src/pages
+ path = this.rootDirectory +`/tmp/${sanitizedName}${extensionWithDot}`;
+ } else {
+
+ // For all other pages, use the existing structure for Docusaurus to parse
const context = ("/" + page.layoutContext + "/").replaceAll("//", "/");
- const path =
- this.rootDirectory + context + sanitizedName + extensionWithDot;
-
- return path;
+ path = this.rootDirectory + context + sanitizedName + extensionWithDot;
}
+ return path;
+}
+
//{
// "position": 2.5,
// "label": "Tutorial",
diff --git a/src/NotionPage.ts b/src/NotionPage.ts
index 7164f0b..4a1b879 100644
--- a/src/NotionPage.ts
+++ b/src/NotionPage.ts
@@ -9,13 +9,18 @@ import { ListBlockChildrenResponseResults } from "notion-to-md/build/types";
// create pages for each node of the outline and then add links from those to the database pages. In this way, we get the benefits of database
// pages (metadata, workflow, etc) and also normal pages (order, position in the outline).
export enum PageType {
- CategoryIndex,
DatabasePage,
Simple,
}
+export enum PageSubType {
+ CategoryIndex,
+ Custom,
+ Content,
+}
export class NotionPage {
public metadata: GetPageResponse;
+ public parentId: string;
public pageId: string;
public order: number;
public layoutContext: string; // where we found it in the hierarchy of the outline
@@ -23,12 +28,14 @@ export class NotionPage {
public constructor(args: {
layoutContext: string;
+ parentId: string;
pageId: string;
order: number;
metadata: GetPageResponse;
foundDirectlyInOutline: boolean;
}) {
this.layoutContext = args.layoutContext;
+ this.parentId = args.parentId
this.pageId = args.pageId;
this.order = args.order;
this.metadata = args.metadata;
@@ -59,23 +66,29 @@ export class NotionPage {
{
"object": "page",
"parent": {
- ("isCategory": "true")
"type": "page_id",
or
"type": "database_id",
...
},
*/
-
- // Check IsCategory flag under parent for level pages with index content
- if ((this.metadata as any).parent.IsCategory) {
- return PageType.CategoryIndex;
- }
return (this.metadata as any).parent.type === "database_id"
? PageType.DatabasePage
: PageType.Simple;
}
+ public get subtype(): PageSubType {
+ // Check subtype flag under parent for level pages with index content or custom pages
+ let subtype = (this.metadata as any).parent?.subtype;
+ if (subtype === 'custom') {
+ return PageSubType.Custom;
+ } else if (subtype === 'categoryindex') {
+ return PageSubType.CategoryIndex;
+ } else {
+ return PageSubType.Content;
+ }
+ }
+
// In Notion, pages from the Database have names and simple pages have titles.
public get nameOrTitle(): string {
return this.type === PageType.DatabasePage ? this.name : this.title;
@@ -83,7 +96,7 @@ export class NotionPage {
public nameForFile(): string {
// In Notion, pages from the Database have names and simple pages have titles. We use "index" by default for Level page with content.
- if (this.type === PageType.CategoryIndex) {
+ if (this.subtype === PageSubType.CategoryIndex) {
return "index";
}
return this.type === PageType.Simple
diff --git a/src/plugins/ColumnListTransformer.ts b/src/plugins/ColumnListTransformer.ts
index 0d0a483..05794ea 100644
--- a/src/plugins/ColumnListTransformer.ts
+++ b/src/plugins/ColumnListTransformer.ts
@@ -30,9 +30,9 @@ async function notionColumnListToTabs(
async child => await notionToMarkdown.blockToMarkdown(child)
)
);
- const content = markdownContent.join("\n");
+ const content = markdownContent.join("\n\n");
- return `\n${content}\n`;
+ return `\n\n${content}\n\n`;
});
const tabItems = await Promise.all(tabItemsPromises);
diff --git a/src/plugins/internalLinks.spec.ts b/src/plugins/internalLinks.spec.ts
index a698c93..1bc0d91 100644
--- a/src/plugins/internalLinks.spec.ts
+++ b/src/plugins/internalLinks.spec.ts
@@ -242,24 +242,61 @@ test("raw link to an existing page on this site that has a slug", async () => {
});
const results = await getMarkdown(
- {
- object: "block",
- id: "2051d790-e527-4b4e-b145-ec0beee2addf",
- parent: {
- type: "page_id",
- page_id: "333",
- },
- created_time: "2023-06-14T20:09:00.000Z",
- last_edited_time: "2023-06-14T20:09:00.000Z",
- has_children: false,
- archived: false,
- // TODO: mention has replaced link_to_page
- type: "link_to_page",
- link_to_page: {
- type: "page_id",
- page_id: targetPageId,
- },
+{
+ "object": "block",
+ "id": "2051d790-e527-4b4e-b145-ec0beee2addf",
+ "parent": {
+ "type": "page_id",
+ "page_id": "333"
},
+ "created_time": "2023-06-14T20:09:00.000Z",
+ "last_edited_time": "2023-06-14T20:09:00.000Z",
+ "has_children": false,
+ "archived": false,
+ "type": "paragraph",
+ "paragraph": {
+ "rich_text": [
+ {
+ "type": "mention",
+ "mention": {
+ "type": "page",
+ "page": {
+ "id": targetPageId
+ }
+ },
+ "annotations": {
+ "bold": false,
+ "italic": false,
+ "strikethrough": false,
+ "underline": false,
+ "code": false,
+ "color": "default"
+ },
+ "plain_text": "Link text",
+ "href": "https://www.notion.so/123"
+ },
+ {
+ "type": "text",
+ "text": {
+ "content": " ",
+ "link": null
+ },
+ "annotations": {
+ "bold": false,
+ "italic": false,
+ "strikethrough": false,
+ "underline": false,
+ "code": false,
+ "color": "default"
+ },
+ "plain_text": " ",
+ "href": null
+ }
+ ],
+ "color": "default"
+ }
+}
+,
targetPage
);
expect(results.trim()).toBe("[Point to Me](/point-to-me)");
diff --git a/src/plugins/pluginTestRun.ts b/src/plugins/pluginTestRun.ts
index 2b47083..b84dd2a 100644
--- a/src/plugins/pluginTestRun.ts
+++ b/src/plugins/pluginTestRun.ts
@@ -223,6 +223,7 @@ export function makeSamplePageObject(options: {
const p = new NotionPage({
layoutContext: "/Second-Level/Third-Level",
+ parentId: "d20d8391-b365-42cb-8821-cf3c5382c6ed",
pageId: id,
order: 0,
metadata: m,
diff --git a/src/pull.ts b/src/pull.ts
index 595aa8d..2b59b6e 100644
--- a/src/pull.ts
+++ b/src/pull.ts
@@ -71,14 +71,18 @@ export async function notionPull(options: DocuNotionOptions): Promise {
layoutStrategy = new HierarchicalNamedLayoutStrategy();
+ // Create output folder
await fs.mkdir(options.markdownOutputPath, { recursive: true });
layoutStrategy.setRootDirectoryForMarkdown(
options.markdownOutputPath.replace(/\/+$/, "") // trim any trailing slash
);
+ // Create a 'tmp' folder for custom pages
+ await fs.mkdir(options.markdownOutputPath.replace(/\/+$/, "") + '/tmp', { recursive: true });
+
info("Connecting to Notion...");
- // Do a quick test to see if we can connect to the root so that we can give a better error than just a generic "could not find page" one.
+ // Do a quick test to see if we can connect to the root so that we can give a better error than just a generic "could not find page" one.
try {
await executeWithRateLimitAndRetries("retrieving root page", async () => {
await notionClient.pages.retrieve({ page_id: options.rootPage });
@@ -99,7 +103,7 @@ export async function notionPull(options: DocuNotionOptions): Promise {
group(
"Stage 1: walk children of the page named 'Outline', looking for pages..."
);
- await getPagesRecursively(options, "", options.rootPage, 0, true);
+ await getPagesRecursively(options, "", options.rootPage, options.rootPage, 0, true);
logDebug("getPagesRecursively", JSON.stringify(pages, null, 2));
info(`Found ${pages.length} pages`);
endGroup();
@@ -172,54 +176,112 @@ async function outputPages(
async function getPagesRecursively(
options: DocuNotionOptions,
incomingContext: string,
- pageIdOfThisParent: string,
- orderOfThisParent: number,
+ parentId: string,
+ pageId: string,
+ pageOrder: number,
rootLevel: boolean
) {
- const pageInTheOutline = await fromPageId(
+ const currentPage = await fromPageId(
+ options,
incomingContext,
- pageIdOfThisParent,
- orderOfThisParent,
- true
+ parentId,
+ pageId,
+ pageOrder,
+ true,
+ false
);
info(
- `Looking for children and links from ${incomingContext}/${pageInTheOutline.nameOrTitle}`
+ `Looking for children and links from ${incomingContext}/${currentPage.nameOrTitle}`
);
- const r = await getBlockChildren(pageInTheOutline.pageId);
- const pageInfo = await pageInTheOutline.getContentInfo(r);
- verbose(`Childs:${pageInfo.childPageIdsAndOrder.length}`);
- verbose(`Links:${pageInfo.linksPageIdsAndOrder.length}`);
- verbose(`hasContent:${pageInfo.hasContent}`);
+ const r = await getBlockChildren(currentPage.pageId);
+ const pageInfo = await currentPage.getContentInfo(r);
+ // TODO: delete
+ // verbose(`RootLevel:${rootLevel}`);
+ // verbose(`ParentID:${parentId}`);
+ // verbose(`PageID:${pageId}`);
+ // verbose(`Childs:${pageInfo.childPageIdsAndOrder.length}`);
+ // verbose(`Links:${pageInfo.linksPageIdsAndOrder.length}`);
+ // verbose(`hasContent:${pageInfo.hasContent}`);
+
+ // case: root page
if (
+ currentPage.pageId == parentId
+ ){
+ warning(`Scan: Root page is "${currentPage.nameOrTitle}". Scanning...`);
+ let layoutContext = incomingContext;
+
+ // Recursively process each child page...
+ for (const childPageInfo of pageInfo.childPageIdsAndOrder) {
+ await getPagesRecursively(
+ options,
+ layoutContext,
+ currentPage.pageId,
+ childPageInfo.id,
+ childPageInfo.order,
+ false
+ );
+ }
+ // ... and links to page.
+ for (const linkPageInfo of pageInfo.linksPageIdsAndOrder) {
+ pages.push(
+ await fromPageId(
+ options,
+ layoutContext,
+ currentPage.pageId,
+ linkPageInfo.id,
+ linkPageInfo.order,
+ false,
+ true
+ )
+ );
+ }
+ }
+
+ // case: custom page contained in the root page to be moved into Docusaurus src/pages folder, except the Outline.
+ else if (
+ currentPage.nameOrTitle != "Outline" &&
+ currentPage.parentId == options.rootPage &&
+ currentPage.pageId != options.rootPage
+ // pageInfo.hasContent
+ ){
+ warning(`Scan: Page "${currentPage.nameOrTitle}" is outside the Outline, it will be stored in "src/pages" to be used as your convenience.`);
+ // Set subtype flag
+ (currentPage.metadata as any).parent.subtype = "custom";
+ pages.push(currentPage);
+ }
+
+ // case: Category page with an index
+ else if (
!rootLevel &&
pageInfo.hasContent &&
(pageInfo.childPageIdsAndOrder.length > 0 || pageInfo.linksPageIdsAndOrder.length > 0)
){
- warning(`Note: The page "${pageInTheOutline.nameOrTitle}" contains both childrens and content so it should produce a level with an index page`);
+ warning(`Scan: Page "${currentPage.nameOrTitle}" contains both childrens and content so it should produce a level with an index page.`);
- // set IsCategory flag
- (pageInTheOutline.metadata as any).parent.IsCategory = true;
+ // Set subtype flag
+ (currentPage.metadata as any).parent.subtype = "categoryindex";
// Add a new level for this page
let layoutContext = layoutStrategy.newLevel(
options.markdownOutputPath,
- pageInTheOutline.order,
+ currentPage.order,
incomingContext,
- pageInTheOutline.nameOrTitle
+ currentPage.nameOrTitle
);
// Forward level for index.md and push it into the pages array
- pageInTheOutline.layoutContext = layoutContext;
- pages.push(pageInTheOutline);
+ currentPage.layoutContext = layoutContext;
+ pages.push(currentPage);
// Recursively process each child page and page link
for (const childPageInfo of pageInfo.childPageIdsAndOrder) {
await getPagesRecursively(
options,
layoutContext,
+ currentPage.pageId,
childPageInfo.id,
childPageInfo.order,
false
@@ -228,41 +290,44 @@ async function getPagesRecursively(
for (const linkPageInfo of pageInfo.linksPageIdsAndOrder) {
pages.push(
await fromPageId(
+ options,
layoutContext,
+ currentPage.pageId,
linkPageInfo.id,
linkPageInfo.order,
- false
+ false,
+ true
)
);
}
}
-
- // Simple content page are being pushed
+ // case: a simple content page
else if (!rootLevel && pageInfo.hasContent) {
- warning(`Note: The page "${pageInTheOutline.nameOrTitle}" is a simple content page.`);
- pages.push(pageInTheOutline);
+ warning(`Scan: Page "${currentPage.nameOrTitle}" is a simple content page.`);
+ pages.push(currentPage);
}
- // a normal outline page that exists just to create the level, pointing at database pages that belong in this level
+ // case: A category page without index that exists just to create the level in the sidebar
else if (
pageInfo.childPageIdsAndOrder.length ||
pageInfo.linksPageIdsAndOrder.length
) {
- warning(`Note: The page "${pageInTheOutline.nameOrTitle}" only has child pages or links to page; it's a level without index.`);
+ warning(`Scan: Page "${currentPage.nameOrTitle}" only has child pages or links to page; it's a level without index.`);
let layoutContext = incomingContext;
// don't make a level for "Outline" page at the root
- if (!rootLevel && pageInTheOutline.nameOrTitle !== "Outline") {
+ if (!rootLevel && currentPage.nameOrTitle !== "Outline") {
layoutContext = layoutStrategy.newLevel(
options.markdownOutputPath,
- pageInTheOutline.order,
+ currentPage.order,
incomingContext,
- pageInTheOutline.nameOrTitle
+ currentPage.nameOrTitle
);
}
for (const childPageInfo of pageInfo.childPageIdsAndOrder) {
await getPagesRecursively(
options,
layoutContext,
+ currentPage.pageId,
childPageInfo.id,
childPageInfo.order,
false
@@ -272,17 +337,23 @@ async function getPagesRecursively(
for (const linkPageInfo of pageInfo.linksPageIdsAndOrder) {
pages.push(
await fromPageId(
+ options,
layoutContext,
+ currentPage.pageId,
linkPageInfo.id,
linkPageInfo.order,
- false
+ false,
+ true
)
);
}
- } else {
+ }
+
+ // case: empty pages and undefined ones
+ else {
console.info(
warning(
- `Warning: The page "${pageInTheOutline.nameOrTitle}" is in the outline but appears to not have content, links to other pages, or child pages. It will be skipped.`
+ `Warning: The page "${currentPage.nameOrTitle}" is in the outline but appears to not have content, links to other pages, or child pages. It will be skipped.`
)
);
++counts.skipped_because_empty;
@@ -403,21 +474,38 @@ export function initNotionClient(notionToken: string): Client {
return notionClient;
}
async function fromPageId(
+ options: DocuNotionOptions,
context: string,
+ parentId: string,
pageId: string,
order: number,
- foundDirectlyInOutline: boolean
+ foundDirectlyInOutline: boolean,
+ isLink: boolean
): Promise {
const metadata = await getPageMetadata(pageId);
-
- //logDebug("notion metadata", JSON.stringify(metadata));
- return new NotionPage({
+ let currentPage = new NotionPage({
layoutContext: context,
+ parentId,
pageId,
order,
metadata,
foundDirectlyInOutline,
});
+ if (isLink) {
+ if (
+ parentId == options.rootPage &&
+ pageId != options.rootPage &&
+ currentPage.nameOrTitle != "Outline"
+ ) {
+ (currentPage.metadata as any).parent.subtype = "custom";
+ warning(`Scan: Page "${currentPage.nameOrTitle}" is a link outside the Outline, it will be stored in "src/pages" to be used as your convenience.`);
+ } else {
+ warning(`Scan: Page "${currentPage.nameOrTitle}" is a link to a page.`);
+ }
+ }
+
+ //logDebug("notion metadata", JSON.stringify(metadata));
+ return currentPage
}
// This function is copied (and renamed from modifyNumberedListObject) from notion-to-md.
diff --git a/src/run.ts b/src/run.ts
index 4b05af4..6f10665 100644
--- a/src/run.ts
+++ b/src/run.ts
@@ -1,6 +1,6 @@
import * as fs from "fs-extra";
import { Option, program } from "commander";
-import { setLogLevel } from "./log";
+import { setLogLevel, warning } from "./log";
import { notionPull } from "./pull";
import path from "path";
@@ -61,7 +61,10 @@ export async function run(): Promise {
program.showHelpAfterError();
program.parse();
setLogLevel(program.opts().logLevel);
- console.log(JSON.stringify(program.opts()));
+
+ const options = program.opts();
+ const safeOptions = { ...options, notionToken: 'REDACTED' }; // Don't console log notion token for safety
+ console.log(JSON.stringify(safeOptions));
// copy in the this version of the css needed to make columns (and maybe other things?) work
let pathToCss = "";
@@ -80,11 +83,57 @@ export async function run(): Promise {
path.join(program.opts().cssOutputDirectory, "docu-notion-styles.css")
);
- // pull and convert
- await notionPull(program.opts()).then(() =>
- console.log("docu-notion Finished.")
- );
+ async function moveTmpContents() {
+ const destTmpPath = "src/pages";
+ const srcTmpPath = path.join(options.markdownOutputPath.replace(/\/+$/, "")+ '/tmp');
+ warning(`dest:${destTmpPath}`)
+ warning(`src:${srcTmpPath}`)
+ fs.ensureDirSync(destTmpPath);
+
+ const tmpFiles = fs.readdirSync(srcTmpPath);
+ for (const file of tmpFiles) {
+ const destFilePath = path.join(destTmpPath, file);
+ const srcFilePath = path.join(srcTmpPath, file);
+
+ if (fs.existsSync(destFilePath)) {
+ // Prompt user for overwriting
+ const overwrite = await promptUserForOverwrite(file);
+ if (!overwrite) {
+ console.log(`Skipping overwrite of '${file}'`);
+ continue;
+ } else {
+ console.log(`Overwriting '${file}'`);
+ }
+ }
+
+ fs.moveSync(srcFilePath, destFilePath, { overwrite: true });
+ }
+ }
+
+ // pull and move custom pages
+ await notionPull(program.opts());
+ await moveTmpContents();
+ console.log("Pull from Notion successful. Custom pages were moved to src/pages.");
+
}
+
function parseLocales(value: string): string[] {
return value.split(",").map(l => l.trim().toLowerCase());
}
+
+// user prompt when custom pages already exists
+const readline = require('readline');
+
+function promptUserForOverwrite(fileName: string) {
+ return new Promise((resolve) => {
+ const rl = readline.createInterface({
+ input: process.stdin,
+ output: process.stdout
+ });
+
+ rl.question(`The file '${fileName}' already exists in 'src/test'. Do you want to overwrite it? (y/any) `, (answer: string) => {
+ resolve(answer.toLowerCase() === 'y');
+ rl.close();
+ });
+ });
+}
\ No newline at end of file
diff --git a/src/transform.ts b/src/transform.ts
index 749a3a6..939a958 100644
--- a/src/transform.ts
+++ b/src/transform.ts
@@ -44,14 +44,14 @@ export async function getMarkdownFromNotionBlocks(
// Level page index.md content filter : Keep the block if it is not a child page or only contains a mention (is a link to page)
// Note: this will filters EVERY page. We assume child_page and mention block to be used only for the purpose of creating a new page.
- // If you want to use links to other pages, you'll have to put a bit of text in the block.
+ // If you want to use links to other pages, you'll have to put a bit of text in the block.
const filteredBlocks = blocks.filter((block: any) => {
// Filter out 'child_page' type blocks
if (block.type === 'child_page') {
return false;
}
- // For paragraph blocks, check if they consist of a mention and an empty text node
+ // Filter out link to page blocks : check if they consist of a mention and an empty text node
if (block.type === 'paragraph' && block.paragraph.rich_text.length === 2) {
const [element1, element2] = block.paragraph.rich_text;
diff --git a/tsconfig.json b/tsconfig.json
index 2521f8b..b392910 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -69,9 +69,10 @@
"include": [
"src/**/*.ts",
"src/**/*.json",
- "test/**/*.ts"
+ // "test/**/*.ts"
],
"exclude": [
- "*.config.ts"
+ "*.config.ts",
+ "**/*.spec.ts"
]
}
\ No newline at end of file