-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExtractTags.py
43 lines (32 loc) · 1.33 KB
/
ExtractTags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
import csv
import xml.etree.ElementTree as ET
# Define the root directory path
rootdir = 'K:\Contenido Catalogo'
# Create an empty list to store the tags
tagsList = []
# Iterate over each file and directory inside the root directory
for subdir, dirs, files in os.walk(rootdir):
for file in files:
# Check if the file is an XML file
if file.endswith('.xml') and not file.startswith('Logs'):
# Combine the directory path and file name to get the full file path
filepath = os.path.join(subdir, file)
# Parse the XML file
tree = ET.parse(filepath)
root = tree.getroot()
# Check if the "Tags" element exists in the XML file
tagsElem = root.find('Tags')
if tagsElem is not None and tagsElem.text is not None:
tags = tagsElem.text.split(',')
# Add tags to the tags list
for tag in tags:
tagsList.append(tag.strip().replace(' ', '-'))
# Sort the tags list
tagsList.sort()
# Export the tags list to a CSV file, removing duplicates
with open('.\Data\Tags.csv', 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_NONNUMERIC)
writer.writerow(['tag'])
for item in sorted(set(tagsList)):
writer.writerow([item])