Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/zendesk #47

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 154 additions & 0 deletions app/data_sources/zendesk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import logging
from datetime import datetime
from typing import Dict, List
from data_source_api.exception import InvalidDataSourceConfig
from data_source_api.basic_document import BasicDocument, DocumentType
from data_source_api.base_data_source import BaseDataSource, ConfigField, HTMLInputType
from indexing_queue import IndexingQueue

import re
import json
import base64
import requests
from parsers.html import html_to_text

logger = logging.getLogger(__name__)


class ZendeskDataSource(BaseDataSource):

cached_users={}
cached_sections={}
cached_categories={}

@staticmethod
def get_config_fields() -> List[ConfigField]:
"""
Returns a list of fields that are required to configure the data source for UI.
for example:
"""
return [
ConfigField(label="BaseUrl", name="baseurl", type="text", placeholder="https://example.zendesk.com"),
ConfigField(label="Email", name="email", type="text", placeholder="[email protected]"),
ConfigField(label="Token", name="token", type="password", placeholder="paste-your-token-here",input_type=HTMLInputType.PASSWORD)
]


@staticmethod
def validate_config(config: Dict) -> None:
"""
Validates the config and raises an exception if it's invalid.
"""
auth=base64.b64encode(bytes('{}/token:{}'.format(config['email'], config['token']), 'utf-8')) # bytes
headers = {'Authorization': 'Basic {}'.format(auth.decode("utf-8"))}
response=requests.get('{}/api/v2/help_center/en-us/articles'.format(config['baseurl']), headers=headers)
if response.status_code != 200:
logger.error("Failed to call Zendesk API. Responsecode: {}".format(response.status_code))
logger.error(response.content)
raise InvalidDataSourceConfig

@staticmethod
def _is_valid_message(message: Dict) -> bool:
return 'client_msg_id' in message

@classmethod
def get_display_name(cls) -> str:
"""
Returns the display name of the data source, change GoogleDriveDataSource to Google Drive.
"""
pascal_case_source = cls.__name__.replace("DataSource", "")
words = re.findall('[A-Z][^A-Z]*', pascal_case_source)
return " ".join(words)

def auth_headers(self):
auth=base64.b64encode(bytes('{}/token:{}'.format(self._config['email'], self._config['token']), 'utf-8')) # bytes
return {'Authorization': 'Basic {}'.format(auth.decode("utf-8"))}

def zendesk_get(self, endpoint):
return requests.get('{}{}'.format(self._config['baseurl'], endpoint), headers=self.auth_headers())

def zendesk_get_user(self, userid):
if not userid in self.cached_users:
user=self.zendesk_get('/api/v2/users/{}'.format(userid))
parsedUser=json.loads(user.content)
self.cached_users[userid] = parsedUser['user']

return self.cached_users[userid]

def zendesk_get_section(self, sectionid):
if not sectionid in self.cached_sections:
section=self.zendesk_get('/api/v2/help_center/sections/{}'.format(sectionid))
parsedSection=json.loads(section.content)
self.cached_sections[sectionid] = parsedSection['section']

return self.cached_sections[sectionid]

def zendesk_get_category(self, categoryid):
if not categoryid in self.cached_categories:
category=self.zendesk_get('/api/v2/help_center/categories/{}'.format(categoryid))
parsedCategory=json.loads(category.content)
self.cached_categories[categoryid] = parsedCategory['category']

return self.cached_categories[categoryid]

def _feed_new_documents(self) -> None:
"""
Feeds the indexing queue with new documents.
"""
total_fed = 0
current_page=1
while True:
response=self.zendesk_get('/api/v2/help_center/en-us/articles?page={}'.format(current_page))
articles=json.loads(response.content)

if current_page == 1:
logger.info("Found {} articles.".format(articles['count']))

parsed_docs = []
for article in articles['articles']:
last_modified = datetime.strptime(article['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
if last_modified < self._last_index_time:
continue

doc_id = article['id']
plain_text = html_to_text(article['body'])
author=self.zendesk_get_user(article['author_id'])
author_image_url = ''
if 'photo' in author and author['photo'] != None:
if 'content_url' in author['photo'] and author['photo']['content_url'] != None:
author_image_url = author['photo']['content_url']

section=self.zendesk_get_section(article['section_id'])
category=self.zendesk_get_category(section['category_id'])

parsed_docs.append(BasicDocument(title=article['title'],
content=plain_text,
author=author['name'],
author_image_url=author_image_url,
timestamp=last_modified,
id=doc_id,
data_source_id=self._data_source_id,
location=category['name'],
url=article['html_url'],
type=DocumentType.DOCUMENT))

total_fed += len(parsed_docs)
IndexingQueue.get().feed(docs=parsed_docs)
logger.info("Processed page {}/{}. Sent {} for indexing".format(articles['page'],articles['page_count'],len(parsed_docs)))
parsed_docs = []

if articles['page'] == articles['page_count']:
break

current_page+=1



def __init__(self, config: Dict, data_source_id: int, last_index_time: datetime = None) -> None:
self._config = config
self._data_source_id = data_source_id

if last_index_time is None:
last_index_time = datetime(2012, 1, 1)
self._last_index_time = last_index_time

Binary file added app/static/data_source_icons/zendesk.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 11 additions & 0 deletions ui/src/components/data-source-panel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,17 @@ export default class DataSourcePanel extends React.Component<DataSourcePanelProp
)
}

{this.state.selectedDataSource.value === 'zendesk' && (
<span>
<span className="leading-9 text-lg text-white">
Follow <a href='https://support.zendesk.com/hc/en-us/articles/4408889192858-Generating-a-new-API-token' rel="noreferrer" className="inline underline" target="_blank">these instructions</a> to create a token.
</span>
<br />
<span className="leading-9 text-lg text-white">
Email should be of a user with access to all relevant articles
</span>
</span>
)}
{this.state.selectedDataSource.value === 'google_drive' && (
<span className="leading-9 text-lg text-white">
Follow <a href='https://github.com/GerevAI/gerev/blob/main/docs/data-sources/google-drive/google-drive.md' rel="noreferrer" className="inline underline" target="_blank">these instructions</a>
Expand Down