From 991b57829a181f418e7c684a1d0c32041201cae1 Mon Sep 17 00:00:00 2001 From: hrbn <277654+hrbn@users.noreply.github.com> Date: Sun, 28 Jul 2024 21:43:15 -0400 Subject: [PATCH] M 100_years_of_solitude.ipynb M edges.csv M nodes.csv --- 100_years_of_solitude.ipynb | 462 +++++++++++++++++++++++------------- edges.csv | 281 +++++++++++----------- nodes.csv | 84 +++---- 3 files changed, 482 insertions(+), 345 deletions(-) diff --git a/100_years_of_solitude.ipynb b/100_years_of_solitude.ipynb index 1e7437f..9d28773 100644 --- a/100_years_of_solitude.ipynb +++ b/100_years_of_solitude.ipynb @@ -3,17 +3,24 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + } + }, "outputs": [], "source": [ "# download the book from the Internet Archive\n", - "!wget https://archive.org/compress/OneHundredYearsOfSolitude_201710\n", - "!unzip -p OneHundredYearsOfSolitude_201710.zip One_Hundred_Years_of_Solitude_djvu.txt>One_Hundred_Years_of_Solitude_djvu.txt" + "# !wget -O OneHundredYearsOfSolitude_201710.zip https://archive.org/compress/OneHundredYearsOfSolitude_201710\n", + "# !unzip -p OneHundredYearsOfSolitude_201710.zip One_Hundred_Years_of_Solitude_djvu.txt>One_Hundred_Years_of_Solitude_djvu.txt" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -77,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -85,19 +92,19 @@ "output_type": "stream", "text": [ " name count\n", - "0 Aureliano Buendia 194\n", + "0 Aureliano Buendia 193\n", "1 Aureliano Segundo 187\n", - "2 Jose Arcadio Buendia 167\n", + "2 Jose Arcadio Buendia 166\n", "3 Jose Arcadio 132\n", "4 Rebeca 95\n", ".. ... ...\n", "98 Remedios 1\n", "99 Coronel 1\n", "100 Ash 1\n", - "101 Mauser 1\n", - "139 Augustus 1\n", + "101 Reviewing Melquiades 1\n", + "138 Augustus 1\n", "\n", - "[140 rows x 2 columns]\n" + "[139 rows x 2 columns]\n" ] } ], @@ -135,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -167,7 +174,7 @@ " \n", " 0\n", " Aureliano Buendia\n", - " 194\n", + " 193\n", " \n", " \n", " 1\n", @@ -177,7 +184,7 @@ " \n", " 2\n", " Jose Arcadio Buendia\n", - " 167\n", + " 166\n", " \n", " \n", " 3\n", @@ -195,28 +202,28 @@ " ...\n", " \n", " \n", - " 62\n", + " 64\n", " Amaranta Buendia\n", " 2\n", " \n", " \n", - " 61\n", - " Aureliano Arcaya\n", + " 62\n", + " Ternera\n", " 2\n", " \n", " \n", - " 60\n", - " Jack Brown\n", + " 61\n", + " Aureliano Arcaya\n", " 2\n", " \n", " \n", - " 59\n", - " Alirio Noguera\n", + " 60\n", + " Solomon\n", " 2\n", " \n", " \n", " 63\n", - " Jose\n", + " Alirio Noguera\n", " 2\n", " \n", " \n", @@ -226,22 +233,22 @@ ], "text/plain": [ " name count\n", - "0 Aureliano Buendia 194\n", + "0 Aureliano Buendia 193\n", "1 Aureliano Segundo 187\n", - "2 Jose Arcadio Buendia 167\n", + "2 Jose Arcadio Buendia 166\n", "3 Jose Arcadio 132\n", "4 Rebeca 95\n", ".. ... ...\n", - "62 Amaranta Buendia 2\n", + "64 Amaranta Buendia 2\n", + "62 Ternera 2\n", "61 Aureliano Arcaya 2\n", - "60 Jack Brown 2\n", - "59 Alirio Noguera 2\n", - "63 Jose 2\n", + "60 Solomon 2\n", + "63 Alirio Noguera 2\n", "\n", "[71 rows x 2 columns]" ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -252,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -265,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -275,7 +282,7 @@ "\n", "import numpy as np\n", "\n", - "output_file_path = \"One_Hundred_Years_of_Solitude.txt\"\n", + "output_file_path = \"One_Hundred_Years_of_Solitude_cleaned.txt\"\n", "\n", "with open(output_file_path, \"r\", encoding=\"utf-8\") as file:\n", " content = file.read()\n", @@ -329,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -341,7 +348,7 @@ "dtype: object" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -355,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -364,7 +371,91 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fromtoweight
2396071
2406091
24160112
2426141
24361547
\n", + "
" + ], + "text/plain": [ + " from to weight\n", + "239 60 7 1\n", + "240 60 9 1\n", + "241 60 11 2\n", + "242 61 4 1\n", + "243 61 54 7" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "edges.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -412,11 +503,11 @@ "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", - "
\n", + "
\n", "
\n", "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "9d6cd86f-1363-4505-8ee0-d204da0b21b6" + } + }, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", @@ -939,8 +1047,27 @@ } ], "source": [ - "renderer = hv.renderer(\"bokeh\")\n", - "renderer.save(chord, \"chord_diagram\")" + "import holoviews as hv\n", + "import panel as pn\n", + "\n", + "from bokeh.resources import INLINE, CDN\n", + "\n", + "pn.extension(raw_css=[\"html, body {background-color: #000000}\"])\n", + "\n", + "chord_pane = pn.panel(chord, name=\"Chord Diagram\")\n", + "\n", + "md_pane = pn.pane.Markdown(\n", + " \"## Chord Diagram of Character Co-occurrence in *One Hundred Years of Solitude*\",\n", + " styles={\"margin\": \"0.42em auto\", \"color\": \"#ffffff\"},\n", + ")\n", + "\n", + "page = pn.Column(\n", + " md_pane,\n", + " chord_pane,\n", + " styles={\"margin\": \"0 auto\"},\n", + ")\n", + "\n", + "page.save(\"pages/index.html\", resources=CDN)" ] } ], @@ -961,6 +1088,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" + }, + "polyglot_notebook": { + "kernelInfo": { + "defaultKernelName": "csharp", + "items": [ + { + "aliases": [], + "name": "csharp" + } + ] + } } }, "nbformat": 4, diff --git a/edges.csv b/edges.csv index d92b7ed..f8659db 100644 --- a/edges.csv +++ b/edges.csv @@ -4,13 +4,14 @@ from,to,weight 0,7,1 0,18,5 0,23,1 -0,27,3 -0,30,2 -0,45,1 +0,27,4 +0,29,2 +0,43,1 +0,44,1 1,0,1 1,3,5 1,5,2 -1,9,4 +1,8,4 1,10,1 1,11,1 1,12,2 @@ -18,48 +19,47 @@ from,to,weight 1,20,2 1,21,1 1,27,2 -1,30,7 -1,42,5 -2,5,2 -2,8,1 -2,14,1 -2,18,2 -2,19,2 -2,50,1 -2,51,3 -2,65,1 +1,29,7 +1,38,5 +2,5,4 +2,9,1 +2,14,2 +2,18,3 +2,19,3 +2,42,1 +2,57,3 +2,69,1 3,0,2 3,1,4 3,4,7 -3,5,10 -3,8,2 -3,9,1 +3,5,9 +3,8,1 +3,9,2 3,10,2 3,11,2 3,12,1 -3,14,1 -3,18,7 -3,24,1 +3,18,6 +3,25,1 3,27,3 -3,51,1 +3,57,1 4,0,1 4,3,7 4,5,3 -4,8,1 +4,9,1 4,11,16 4,12,1 4,14,1 4,18,3 4,19,1 -4,56,1 -4,59,1 +4,54,1 +4,61,1 5,0,1 -5,2,1 -5,3,11 +5,2,2 +5,3,10 5,4,2 5,7,2 -5,8,2 -5,10,12 +5,9,2 +5,10,13 5,11,5 5,12,3 5,14,1 @@ -67,42 +67,41 @@ from,to,weight 5,17,1 5,18,7 5,23,1 -5,25,1 -5,27,3 -5,30,4 -5,36,1 -5,47,1 -5,51,2 -5,53,1 -5,70,37 +5,24,1 +5,27,2 +5,29,4 +5,32,1 +5,55,1 +5,57,2 +5,60,37 7,5,3 -7,8,1 +7,9,1 7,10,1 -7,25,1 +7,24,1 7,27,1 -7,40,2 -7,70,1 -8,3,2 -8,4,4 -8,5,1 -8,7,1 -8,11,3 -8,18,2 -9,1,5 -9,3,1 -9,10,1 -9,11,2 -9,20,1 -9,26,3 -9,30,6 -9,42,1 +7,39,2 +7,60,1 +8,1,5 +8,3,1 +8,10,1 +8,11,2 +8,20,1 +8,26,3 +8,29,6 +8,38,1 +9,3,2 +9,4,4 +9,5,1 +9,7,1 +9,11,3 +9,18,2 10,3,2 -10,5,13 +10,5,14 10,7,1 -10,9,1 +10,8,1 10,15,1 10,27,3 -10,30,3 +10,29,3 11,1,1 11,3,2 11,4,15 @@ -113,15 +112,14 @@ from,to,weight 11,14,1 11,18,13 11,22,1 -11,30,4 -11,56,1 +11,29,4 +11,54,1 12,1,1 12,3,1 12,4,1 12,5,4 12,18,1 -14,2,1 -14,3,1 +14,2,2 14,4,1 14,5,1 14,11,1 @@ -133,114 +131,115 @@ from,to,weight 16,14,1 17,5,1 17,18,1 -17,51,1 +17,57,1 18,0,4 18,1,2 -18,2,2 -18,3,7 +18,2,3 +18,3,6 18,4,2 18,5,5 18,7,1 -18,8,2 -18,9,1 +18,8,1 +18,9,2 18,11,13 18,12,1 18,14,1 18,17,1 -18,19,1 +18,19,2 18,22,1 18,27,3 -18,30,6 -18,34,1 -18,51,2 +18,29,6 +18,35,1 +18,57,2 19,2,2 19,4,1 -19,18,1 -19,51,1 +19,18,2 +19,57,1 20,1,1 -20,9,1 +20,8,1 20,27,1 -22,45,3 -22,47,1 +22,43,3 +22,44,1 23,0,1 23,5,1 23,27,2 -23,30,1 -23,32,1 -23,35,1 -25,5,1 -25,7,1 -25,16,1 -25,24,1 -25,40,1 -26,9,2 +23,29,1 +23,34,1 +23,36,1 +24,5,1 +24,7,1 +24,16,1 +24,25,1 +24,39,1 +26,8,2 26,12,1 -27,0,3 +27,0,4 27,1,2 27,2,1 27,3,4 -27,5,2 +27,5,1 27,10,3 27,14,1 27,18,2 27,23,2 -27,24,1 +27,25,1 27,26,1 -27,30,1 +27,29,1 27,34,1 27,35,1 -27,42,1 -27,47,1 -27,49,1 -27,51,1 -30,0,1 -30,1,7 -30,3,1 -30,5,3 -30,9,5 -30,10,2 -30,11,4 -30,18,4 -30,20,1 -30,23,1 -30,27,1 -30,42,2 -32,23,1 -32,35,1 -34,18,1 +27,38,1 +27,44,1 +27,53,1 +27,57,1 +29,0,1 +29,1,7 +29,2,1 +29,3,1 +29,5,3 +29,8,5 +29,10,2 +29,11,4 +29,18,4 +29,20,1 +29,23,1 +29,27,1 +29,38,2 +32,5,1 34,27,1 +34,36,1 +35,18,1 35,27,1 -35,32,1 -36,5,1 -37,56,1 -40,7,2 -40,25,1 -42,1,9 -42,9,1 -42,18,1 -42,27,1 -43,3,1 -45,0,1 -45,22,3 -49,27,1 -50,2,1 -51,2,4 -51,3,1 -51,5,3 -51,17,1 -51,18,4 -51,19,1 -51,27,1 -51,50,1 -52,70,1 -53,5,1 -56,4,1 -56,37,1 -56,59,7 -57,70,1 -59,4,1 -59,56,7 -70,5,37 -70,7,1 -70,8,1 -70,11,2 +36,23,1 +36,34,1 +38,1,9 +38,8,1 +38,18,1 +38,27,1 +39,7,2 +39,24,1 +42,2,1 +43,0,1 +43,22,3 +47,54,1 +49,3,1 +51,60,1 +53,27,1 +54,4,1 +54,47,1 +54,61,7 +55,5,1 +56,60,1 +57,2,4 +57,3,1 +57,5,3 +57,17,1 +57,18,4 +57,19,1 +57,27,1 +57,42,1 +60,5,37 +60,7,1 +60,9,1 +60,11,2 +61,4,1 +61,54,7 diff --git a/nodes.csv b/nodes.csv index bdd06f4..34f0b6a 100644 --- a/nodes.csv +++ b/nodes.csv @@ -7,8 +7,8 @@ Rebeca,id_4 Aureliano,id_5 Jose Arcadio Segundo,id_6 Gerineldo Marquez,id_7 -Pietro Crespi,id_8 -Meme,id_9 +Meme,id_8 +Pietro Crespi,id_9 Amaranta Ursula,id_10 Amaranta,id_11 Pilar Ternera,id_12 @@ -23,50 +23,50 @@ Brown,id_20 Antonio Isabel,id_21 Riohacha,id_22 Gabriel,id_23 -Roque Carnicero,id_24 -Catarino,id_25 +Catarino,id_24 +Roque Carnicero,id_25 Mauricio Babilonia,id_26 Macondo,id_27 Patricia Brown,id_28 -Herbert,id_29 -Fernanda,id_30 +Fernanda,id_29 +Herbert,id_30 Don Fernando,id_31 -Alfonso,id_32 +Gerineldo,id_32 Aurelito,id_33 -Jose Raquel Moncada,id_34 -Alvaro,id_35 -Gerineldo,id_36 -Bruno Crespi,id_37 -Divine Providence,id_38 -Rebeca Buendia,id_39 -Magnifico Visbal,id_40 -Teofilo Vargas,id_41 -Petra Cotes,id_42 -Gavilan,id_43 -Saint Joseph,id_44 -Francis Drake,id_45 +Alvaro,id_34 +Jose Raquel Moncada,id_35 +Alfonso,id_36 +Saint Joseph,id_37 +Petra Cotes,id_38 +Magnifico Visbal,id_39 +Divine Providence,id_40 +Rebeca Buendia,id_41 +Visitacion,id_42 +Francis Drake,id_43 +Victorio Medina,id_44 +Petronio,id_45 Aureliano Centeno,id_46 -Victorio Medina,id_47 -Petronio,id_48 -Noguera,id_49 -Visitacion,id_50 -Melquiades,id_51 -Aquiles Ricardo,id_52 -Don Apolinar,id_53 -Gregorio Stevenson,id_54 -Angel,id_55 -Amparo,id_56 -Carmelita Montiel,id_57 -Nicanor Reyna,id_58 -Moscote,id_59 -Auerliano,id_60 -Argenida,id_61 -Fernanda del Carpio,id_62 -Crespi,id_63 -Ternera,id_64 -Solomon,id_65 +Bruno Crespi,id_47 +Teofilo Vargas,id_48 +Gavilan,id_49 +Nicanor Reyna,id_50 +Aquiles Ricardo,id_51 +Angel,id_52 +Noguera,id_53 +Amparo,id_54 +Don Apolinar,id_55 +Carmelita Montiel,id_56 +Melquiades,id_57 +Gregorio Stevenson,id_58 +Fernanda del Carpio,id_59 +Jose,id_60 +Moscote,id_61 +Jack Brown,id_62 +Auerliano,id_63 +Crespi,id_64 +Argenida,id_65 Amaranta Buendia,id_66 -Aureliano Arcaya,id_67 -Jack Brown,id_68 -Alirio Noguera,id_69 -Jose,id_70 +Ternera,id_67 +Aureliano Arcaya,id_68 +Solomon,id_69 +Alirio Noguera,id_70