Skip to content

Commit

Permalink
add NCEI sitemap
Browse files Browse the repository at this point in the history
  • Loading branch information
jmckenna committed Jan 11, 2024
1 parent 095edf6 commit a446b34
Show file tree
Hide file tree
Showing 2 changed files with 259 additions and 0 deletions.
76 changes: 76 additions & 0 deletions collection/scripts/ncei-sitemap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python

"""
Purpose: Standalone script to generate a sitemap.xml from a CSV,
where the CSV was generated from a data export
from the Marine Microplastics viewer
https://www.ncei.noaa.gov/products/microplastics
Usage: python ncei-sitemap.py
Output: sitemap.xml
Requires: Python 3.x
Note: the url <loc> value must be unique, for the sitemap to be valid
(meaning: you cannot have duplicate urls listed inside
the sitemap.xml)
"""

# define common variables
CSV_FILENAME = "MarineMicroplastics-2024-01-10.csv"
NEW_SITEMAP_FILENAME = "sitemap.xml"

"""
#########################
# you shouldn't have to modify anything below
#########################
"""

import csv
import os, sys
import datetime

# Get Today's Date to add as Lastmod
lastmod_date = datetime.datetime.now().strftime('%Y-%m-%d')

# function to print a line of html for the indented hyperlink
def printlink(url):
print (" <url>")
print (" <loc>" + url + "</loc>")
print (" <lastmod>" + lastmod_date + "</lastmod>")
print (" </url>")
return

original_stdout = sys.stdout # Save a reference to the original standard output

#create sitemap.xml
with open('sitemap.xml', 'w', newline='\n') as f:
sys.stdout = f # Change the standard output to the file we created.
print ('<?xml version="1.0" encoding="UTF-8"?>')
print ('<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9">')

#use a Python set, to force unique values
urls = set()

#open existing CSV for reading
with open(CSV_FILENAME, newline='') as csvfile:
reader = csv.DictReader(csvfile, delimiter =',',quotechar ='"',quoting=csv.QUOTE_MINIMAL)

#loop through all records
for row in reader:
urls.add(row['Accession Link'])

#iterate through unique record urls
for val in urls:
printlink(val)

print ('</urlset>')
sys.stdout = original_stdout # Reset the standard output to its original value

print("\n")
print("************************")
print(" " + str(len(urls)) + " records exported to sitemap.xml")
print("************************")
print("\n")
183 changes: 183 additions & 0 deletions collection/tempHosting/data-ncei/sitemap.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0285700</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:211009</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:270520</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:211008</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:280519</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276018</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:277496</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0259486</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276422</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276265</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276482</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0253450</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0170967</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276019</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:270530</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276263</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:275966</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276266</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0253447</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:279326</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276017</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:279322</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0253448</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276020</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:259524</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:277995</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:277497</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:281206</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0253923</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:276264</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:279324</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:259692</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:279323</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:259525</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:278002</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0253140</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0278270</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:275968</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:270542</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:211007</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0277839</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0278269</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:275967</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:284765</loc>
<lastmod>2024-01-10</lastmod>
</url>
<url>
<loc>https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:270528</loc>
<lastmod>2024-01-10</lastmod>
</url>
</urlset>

0 comments on commit a446b34

Please sign in to comment.