Skip to content

Commit

Permalink
pipeline running locally
Browse files Browse the repository at this point in the history
  • Loading branch information
dcolinmorgan committed Mar 27, 2024
1 parent ef786b3 commit b523808
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 8 deletions.
3 changes: 1 addition & 2 deletions DOTS/feat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
from sklearn.feature_extraction.text import CountVectorizer
from transformers import AutoModel, AutoTokenizer
import torch, spacy,nltk,subprocess, json, requests,string,csv,logging,os
if __name__ == "__main__" and __package__ is None:
__package__ = "DOTS"

from .scrape import get_OS_data, get_massive_OS_data, get_google_news, scrape_lobstr # need .scrape and .pull for production
from .pull import process_hit, process_data, pull_data, pull_lobstr_gdoc

Expand Down
2 changes: 1 addition & 1 deletion DOTS/input/lobstr_results.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion DOTS/input/runs.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"count":6,"limit":50,"page":1,"total_pages":1,"result_from":1,"result_to":7,"data":[{"id":"a9f051e3b2f44c6c99e8f81ff2eff552","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-27T02:38:24Z","total_results":0,"total_unique_results":0,"next_launch_at":null,"ended_at":"2024-03-27T02:46:28Z","duration":0.0,"credit_used":0.0,"origin":"user","status":"aborted","export_done":true,"export_count":0,"export_time":"2024-03-27T02:46:34Z","email_done":null,"email_time":null,"created_at":"2024-03-27T02:38:24Z","done_reason":"aborted","done_reason_desc":null},{"id":"b478b7b0fbfa4d7f861c45102d610f43","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-27T02:00:11Z","total_results":0,"total_unique_results":0,"next_launch_at":null,"ended_at":"2024-03-27T02:38:13Z","duration":0.0,"credit_used":0.0,"origin":"schedule","status":"done","export_done":true,"export_count":0,"export_time":"2024-03-27T02:38:32Z","email_done":null,"email_time":null,"created_at":"2024-03-27T02:00:11Z","done_reason":"deactivated","done_reason_desc":null},{"id":"a31a9c57e2aa4785b787fe046a215f7d","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-26T02:10:42Z","total_results":1793,"total_unique_results":1782,"next_launch_at":null,"ended_at":"2024-03-26T02:24:20Z","duration":632.024899999998,"credit_used":632.024899999998,"origin":"user","status":"done","export_done":true,"export_count":0,"export_time":"2024-03-26T02:24:27Z","email_done":null,"email_time":null,"created_at":"2024-03-26T02:10:42Z","done_reason":"no_credits_left","done_reason_desc":"No credits left."},{"id":"9d78be76b50a4183ae0a6bbf916a859e","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-26T02:09:08Z","total_results":111,"total_unique_results":111,"next_launch_at":null,"ended_at":"2024-03-26T02:09:58Z","duration":36.52109999999998,"credit_used":36.52109999999998,"origin":"user","status":"aborted","export_done":true,"export_count":0,"export_time":"2024-03-26T02:10:04Z","email_done":null,"email_time":null,"created_at":"2024-03-26T02:09:08Z","done_reason":"aborted","done_reason_desc":null},{"id":"2db39f206a22445fac5ab0bf89f43afa","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-26T02:00:02Z","total_results":636,"total_unique_results":637,"next_launch_at":null,"ended_at":"2024-03-26T02:05:17Z","duration":232.0251000000002,"credit_used":232.0251000000002,"origin":"schedule","status":"aborted","export_done":true,"export_count":0,"export_time":"2024-03-26T02:05:23Z","email_done":null,"email_time":null,"created_at":"2024-03-26T02:00:02Z","done_reason":"aborted","done_reason_desc":null},{"id":"60771d36a7fb4cbd8d6edbd2ed6375d6","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-25T01:53:14Z","total_results":3085,"total_unique_results":3078,"next_launch_at":null,"ended_at":"2024-03-25T02:12:51Z","duration":900.4967000000008,"credit_used":900.4967000000008,"origin":"user","status":"done","export_done":true,"export_count":0,"export_time":"2024-03-25T02:13:02Z","email_done":null,"email_time":null,"created_at":"2024-03-25T01:53:14Z","done_reason":"no_credits_left","done_reason_desc":"No credits left."}],"next":null,"previous":null}
{"count":8,"limit":50,"page":1,"total_pages":1,"result_from":1,"result_to":9,"data":[{"id":"583807ecf2304e869d2b3ec4a52879f3","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-27T07:03:48Z","total_results":1513,"total_unique_results":1510,"next_launch_at":null,"ended_at":"2024-03-27T08:17:34Z","duration":901.257399999999,"credit_used":901.257399999999,"origin":"user","status":"done","export_done":true,"export_count":0,"export_time":"2024-03-27T08:17:45Z","email_done":null,"email_time":null,"created_at":"2024-03-27T07:03:48Z","done_reason":"no_credits_left","done_reason_desc":"No credits left."},{"id":"856dbf0ca7db4c27b88d86bb39a1c718","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-27T03:21:02Z","total_results":0,"total_unique_results":0,"next_launch_at":null,"ended_at":"2024-03-27T03:50:48Z","duration":0.0,"credit_used":0.0,"origin":"user","status":"aborted","export_done":true,"export_count":0,"export_time":"2024-03-27T03:50:55Z","email_done":null,"email_time":null,"created_at":"2024-03-27T03:21:02Z","done_reason":"aborted","done_reason_desc":null},{"id":"a9f051e3b2f44c6c99e8f81ff2eff552","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-27T02:38:24Z","total_results":0,"total_unique_results":0,"next_launch_at":null,"ended_at":"2024-03-27T02:46:28Z","duration":0.0,"credit_used":0.0,"origin":"user","status":"aborted","export_done":true,"export_count":0,"export_time":"2024-03-27T02:46:34Z","email_done":null,"email_time":null,"created_at":"2024-03-27T02:38:24Z","done_reason":"aborted","done_reason_desc":null},{"id":"b478b7b0fbfa4d7f861c45102d610f43","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-27T02:00:11Z","total_results":0,"total_unique_results":0,"next_launch_at":null,"ended_at":"2024-03-27T02:38:13Z","duration":0.0,"credit_used":0.0,"origin":"schedule","status":"done","export_done":true,"export_count":0,"export_time":"2024-03-27T02:38:32Z","email_done":null,"email_time":null,"created_at":"2024-03-27T02:00:11Z","done_reason":"deactivated","done_reason_desc":null},{"id":"a31a9c57e2aa4785b787fe046a215f7d","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-26T02:10:42Z","total_results":1793,"total_unique_results":1782,"next_launch_at":null,"ended_at":"2024-03-26T02:24:20Z","duration":632.024899999998,"credit_used":632.024899999998,"origin":"user","status":"done","export_done":true,"export_count":0,"export_time":"2024-03-26T02:24:27Z","email_done":null,"email_time":null,"created_at":"2024-03-26T02:10:42Z","done_reason":"no_credits_left","done_reason_desc":"No credits left."},{"id":"9d78be76b50a4183ae0a6bbf916a859e","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-26T02:09:08Z","total_results":111,"total_unique_results":111,"next_launch_at":null,"ended_at":"2024-03-26T02:09:58Z","duration":36.52109999999998,"credit_used":36.52109999999998,"origin":"user","status":"aborted","export_done":true,"export_count":0,"export_time":"2024-03-26T02:10:04Z","email_done":null,"email_time":null,"created_at":"2024-03-26T02:09:08Z","done_reason":"aborted","done_reason_desc":null},{"id":"2db39f206a22445fac5ab0bf89f43afa","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-26T02:00:02Z","total_results":636,"total_unique_results":637,"next_launch_at":null,"ended_at":"2024-03-26T02:05:17Z","duration":232.0251000000002,"credit_used":232.0251000000002,"origin":"schedule","status":"aborted","export_done":true,"export_count":0,"export_time":"2024-03-26T02:05:23Z","email_done":null,"email_time":null,"created_at":"2024-03-26T02:00:02Z","done_reason":"aborted","done_reason_desc":null},{"id":"60771d36a7fb4cbd8d6edbd2ed6375d6","object":"run","cluster":"8de6e1bbf33f47b8bce451075b883252","is_done":true,"started_at":"2024-03-25T01:53:14Z","total_results":3085,"total_unique_results":3078,"next_launch_at":null,"ended_at":"2024-03-25T02:12:51Z","duration":900.4967000000008,"credit_used":900.4967000000008,"origin":"user","status":"done","export_done":true,"export_count":0,"export_time":"2024-03-25T02:13:02Z","email_done":null,"email_time":null,"created_at":"2024-03-25T01:53:14Z","done_reason":"no_credits_left","done_reason_desc":"No credits left."}],"next":null,"previous":null}
9 changes: 5 additions & 4 deletions DOTS/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from tqdm import tqdm
from datetime import datetime, timedelta
from gnews import GNews
import pandas as pd
import xml.etree.ElementTree as ET
load_dotenv()
os_url = os.getenv('OS_TOKEN')
Expand Down Expand Up @@ -112,10 +113,10 @@ def scrape_lobstr():
'curl', 'https://api.lobstr.io/v1/runs?page=1&page_size=3000',
'-H', 'Accept: application/json',
'-H', f"Authorization: Token {lobstr_key}",
'-o', 'input/runs.json'
'-o', 'DOTS/input/runs.json'
])

with open("input/runs.json", 'r') as f:
with open("DOTS/input/runs.json", 'r') as f:
runs = json.load(f)
juns=pd.DataFrame(runs['data'])
AA=juns[['id','cluster','total_unique_results']]
Expand All @@ -125,10 +126,10 @@ def scrape_lobstr():
'curl', f"https://api.lobstr.io/v1/results?cluster=8de6e1bbf33f47b8bce451075b883252&run={latest_success_run['id']}&page=1&page_size=3000",
'-H', 'Accept: application/json',
'-H', f"Authorization: Token {lobstr_key}",
'-o', 'input/lobstr_results.json'
'-o', 'DOTS/input/lobstr_results.json'
])

with open("input/lobstr_results.json", 'r') as f:
with open("DOTS/input/lobstr_results.json", 'r') as f:
data = json.load(f)

jata=pd.DataFrame(data['data'])
Expand Down

0 comments on commit b523808

Please sign in to comment.