Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rerun Ghana interviews #260

Merged
merged 1 commit into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions belief_pipeline/tpi_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ def get_in_and_out() -> Tuple[str, str]:
if __name__ == "__main__":
belief_model_name: str = "maxaalexeeva/belief-classifier_mturk_unmarked-trigger_bert-base-cased_2023-4-26-0-34"
sentiment_model_name: str = "hriaz/finetuned_beliefs_sentiment_classifier_experiment1"
locations_file_name: str = "./belief_pipeline/UG.tsv"
input_file_name: str = "../corpora/interviews/uganda/uganda-interviews.tsv"
output_file_name: str = "../corpora/interviews/uganda/uganda-interviews-a.tsv"
locations_file_name: str = "./belief_pipeline/GH.tsv"
input_file_name: str = "../corpora/interviews/ghana/ghana-interviews.tsv"
output_file_name: str = "../corpora/interviews/ghana/ghana-interviews-a.tsv"
# input_file_name, output_file_name = get_in_and_out()
pipeline = Pipeline(
TpiInputStage(input_file_name),
Expand Down
66 changes: 33 additions & 33 deletions scraper/corpora/interviews/ghana/articlecorpus.txt
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
file:/Transcript GHA-0001 Gatekeeper Binsere.docx
file:/Transcript GHA-0002 Men FGD Community Mapping Binsere.docx
file:/Transcript GHA-0003 Women FGD Community Mapping Binsere.docx
file:/Transcript GHA-0005 Men FGD Pairwise Ranking Binsere.docx
file:/Transcript GHA-0006 Women FGD Pairwise Ranking Binsere.docx
file:/Transcript GHA-0007 Gatekeeper Mameriwa.docx
file:/Transcript GHA-0008 Men FGD Community Mapping Mameriwa.docx
file:/Transcript GHA-0009 Women FGD Community Mapping Mameriwa.docx
file:/Transcript GHA-0011 Men FGD Pairwise Ranking Mameriwa.docx
file:/Transcript GHA-0012 Women FGD Pairwise Ranking Mameriwa.docx
file:/Transcript GHA-0013 Gatekeeper Manso Adubia.docx
file:/Transcript GHA-0014 Men FGD Community Mapping Manso Abudia.docx
file:/Transcript GHA-0015 Community Resource Mapping Women FGD Manso Adubia.docx
file:/Transcript GHA-0017 Men FGD Pairwise Ranking Manso Adubia.docx
file:/Transcript GHA-0018 Women FGD Pairwise Ranking Manso Adubia.docx
file:/Transcript GHA-0019 Gatekeeper Essase Bontefufuo.docx
file:/Transcript GHA-0020 Men FGD Community Map Essase Bontefufuo.docx
file:/Transcript GHA-0021 Women FGD Community Mapping Essase Bontefufuo.docx
file:/Transcript GHA-0023 Men FGD Pairwise Ranking Essase Bontefufuo.docx
file:/Transcript GHA-0024 Women FGD Pairwise Ranking Essase Bontefufuo.docx
file:/Transcript GHA-0025 Gatekeeper Dompoase.docx
file:/Transcript GHA-0026 Men FGD Community Mapping Dompoase.docx
file:/Transcript GHA-0027 Women FGD Community Mapping Dompoase.docx
file:/Transcript GHA-0029 Men FGD Pairwise Dompoase.docx
file:/Transcript GHA-0030 Women FGD Pairwise Dompoase.docx
file:/Transcript GHA-E0001 National expert.docx
file:/Transcript GHA-E0002 National expert (Univ of Ghana professor).docx
file:/Transcript GHA-E0003 National expert (reporter).docx
file:/Transcript GHA-E0004 Local Expert Obuasi Municipal.docx
file:/Transcript GHA-E0008 Local Expert Adansi North.docx
file:/Transcript GHA-E0009 Local Experts Adansi North.docx
file:/Transcript GHA-E0010 Local Expert Kumasi Metro.docx
file:/Transcript GHA-E0011 Local Expert Kumasi Metro.docx
file:/Amansie South District (Manso Abudia)/Transcript GHA-0013 Gatekeeper Manso Adubia Amansie South.docx
file:/Amansie South District (Manso Abudia)/Transcript GHA-0014 Men FGD Community Mapping Manso Abudia Amansie South.docx
file:/Amansie South District (Manso Abudia)/Transcript GHA-0015 Community Resource Mapping Women FGD Manso Adubia Amansie South.docx
file:/Amansie South District (Manso Abudia)/Transcript GHA-0017 Men FGD Pairwise Ranking Manso Adubia, Amansie South.docx
file:/Amansie South District (Manso Abudia)/Transcript GHA-0018 Women FGD Pairwise Ranking Manso Adubia Amansie South.docx
file:/Amansi North District (Dompoase)/Transcript GHA-0025 Gatekeeper Dompoase Adansi North.docx
file:/Amansi North District (Dompoase)/Transcript GHA-0030 Women FGD Pairwise Dompoase, Adansi North.docx
file:/Amansi North District (Dompoase)/Transcript GHA-0026 Men FGD Community Mapping Dompoase Adansi North.docx
file:/Amansi North District (Dompoase)/Transcript GHA-E0008 Local Expert Adansi North.docx
file:/Amansi North District (Dompoase)/Transcript GHA-0027 Women FGD Community Mapping Dompoase, Adansi North.docx
file:/Amansi North District (Dompoase)/Transcript GHA-E0009 Local Experts Adansi North.docx
file:/Amansi North District (Dompoase)/Transcript GHA-0029 Men FGD Pairwise Dompoase, Adansi North.docx
file:/Amansi West District (Essase B.)/Transcript GHA-0019 Gatekeeper Essase Bontefufuo, Amansie West.docx
file:/Amansi West District (Essase B.)/Transcript GHA-0020 Men FGD Community Map Essase Bontefufuo Amansie West.docx
file:/Amansi West District (Essase B.)/Transcript GHA-0021 Women FGD Community Mapping Essase Bontefufuo Amansie West.docx
file:/Amansi West District (Essase B.)/Transcript GHA-0023 Men FGD Pairwise Ranking Essase Bontefufuo Amansie West.docx
file:/Amansi West District (Essase B.)/Transcript GHA-0024 Women FGD Pairwise Ranking Essase Bontefufuo Amansie West.docx
file:/Kumasi/Transcript GHA-E0001 National expert Ashanti Region.docx
file:/Kumasi/Transcript GHA-E0010 Local Expert Kumasi Metro Ashanti Region.docx
file:/Kumasi/Transcript GHA-E0002 National expert (Univ of Ghana professor) Ashanti Region.docx
file:/Kumasi/Transcript GHA-E0011 Local Expert Kumasi Metro Ashanti Region.docx
file:/Kumasi/Transcript GHA-E0003 National expert (reporter) Ashanti Region.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0001 Gatekeeper Binsere Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0008 Men FGD Community Mapping Mameriwa Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0002 Men FGD Community Mapping Binsere Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0009 Women FGD Community Mapping Mameriwa Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0003 Women FGD Community Mapping Binsere Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0011 Men FGD Pairwise Ranking Mameriwa Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0005 Men FGD Pairwise Ranking Binsere Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0012 Women FGD Pairwise Ranking Mameriwa Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0006 Women FGD Pairwise Ranking Binsere Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-E0004 Local Expert Obuasi.docx
file:/Obuasi District (Binsere and Mameriva)/Transcript GHA-0007 Gatekeeper Mameriwa Obuasi.docx
33 changes: 33 additions & 0 deletions scraper/corpora/interviews/ghana/articlecorpus1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
file:/Transcript GHA-0001 Gatekeeper Binsere.docx
file:/Transcript GHA-0002 Men FGD Community Mapping Binsere.docx
file:/Transcript GHA-0003 Women FGD Community Mapping Binsere.docx
file:/Transcript GHA-0005 Men FGD Pairwise Ranking Binsere.docx
file:/Transcript GHA-0006 Women FGD Pairwise Ranking Binsere.docx
file:/Transcript GHA-0007 Gatekeeper Mameriwa.docx
file:/Transcript GHA-0008 Men FGD Community Mapping Mameriwa.docx
file:/Transcript GHA-0009 Women FGD Community Mapping Mameriwa.docx
file:/Transcript GHA-0011 Men FGD Pairwise Ranking Mameriwa.docx
file:/Transcript GHA-0012 Women FGD Pairwise Ranking Mameriwa.docx
file:/Transcript GHA-0013 Gatekeeper Manso Adubia.docx
file:/Transcript GHA-0014 Men FGD Community Mapping Manso Abudia.docx
file:/Transcript GHA-0015 Community Resource Mapping Women FGD Manso Adubia.docx
file:/Transcript GHA-0017 Men FGD Pairwise Ranking Manso Adubia.docx
file:/Transcript GHA-0018 Women FGD Pairwise Ranking Manso Adubia.docx
file:/Transcript GHA-0019 Gatekeeper Essase Bontefufuo.docx
file:/Transcript GHA-0020 Men FGD Community Map Essase Bontefufuo.docx
file:/Transcript GHA-0021 Women FGD Community Mapping Essase Bontefufuo.docx
file:/Transcript GHA-0023 Men FGD Pairwise Ranking Essase Bontefufuo.docx
file:/Transcript GHA-0024 Women FGD Pairwise Ranking Essase Bontefufuo.docx
file:/Transcript GHA-0025 Gatekeeper Dompoase.docx
file:/Transcript GHA-0026 Men FGD Community Mapping Dompoase.docx
file:/Transcript GHA-0027 Women FGD Community Mapping Dompoase.docx
file:/Transcript GHA-0029 Men FGD Pairwise Dompoase.docx
file:/Transcript GHA-0030 Women FGD Pairwise Dompoase.docx
file:/Transcript GHA-E0001 National expert.docx
file:/Transcript GHA-E0002 National expert (Univ of Ghana professor).docx
file:/Transcript GHA-E0003 National expert (reporter).docx
file:/Transcript GHA-E0004 Local Expert Obuasi Municipal.docx
file:/Transcript GHA-E0008 Local Expert Adansi North.docx
file:/Transcript GHA-E0009 Local Experts Adansi North.docx
file:/Transcript GHA-E0010 Local Expert Kumasi Metro.docx
file:/Transcript GHA-E0011 Local Expert Kumasi Metro.docx
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import org.clulab.habitus.scraper.scrapers.article.CorpusArticleScraper

object ArticleScraperApp extends App {
val term = "interview"
val corpusFileName = args.lift(0).getOrElse(s"./scraper/corpora/interviews/uganda/articlecorpus.txt")
val baseDirName = args.lift(1).getOrElse("../corpora/interviews/uganda/articles")
val corpusFileName = args.lift(0).getOrElse(s"./scraper/corpora/interviews/ghana/articlecorpus.txt")
val baseDirName = args.lift(1).getOrElse("../corpora/interviews/ghana/articles")
val corpus = PageCorpus(corpusFileName)
val scraper = new CorpusArticleScraper(corpus)
val browser: Browser = new HabitusBrowser()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import scala.util.Using
object Step1OutputEidos extends App {
implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats

val baseDirectoryName = args.lift(0).getOrElse("../corpora/interviews/uganda/articles")
val baseDirectoryName = args.lift(0).getOrElse("../corpora/interviews/ghana/articles")
val inAndOutFiles = new File(baseDirectoryName)
.listFilesByWildcard("*.json", recursive = true)
.map { inFile =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import scala.util.Using
object Step2InputEidos extends App with Logging {
implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats
val contextWindow = 3
val baseDirectory = "../corpora/interviews/uganda/articles"
val outputFileName = "../corpora/interviews/uganda/uganda-interviews.tsv"
val baseDirectory = "../corpora/interviews/ghana/articles"
val outputFileName = "../corpora/interviews/ghana/ghana-interviews.tsv"
val deserializer = new JLDDeserializer()

def jsonFileToJsonld(jsonFile: File): File =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import org.clulab.utils.{FileUtils, Logging, Sourcer}
import scala.util.Using

object Step3InterpretDates extends App with Logging {
val inputFileName = "../corpora/interviews/uganda/uganda-interviews-a.tsv"
val outputFileName = "../corpora/interviews/uganda/uganda-interviews-b.tsv"
val inputFileName = "../corpora/interviews/ghana/ghana-interviews-a.tsv"
val outputFileName = "../corpora/interviews/ghana/ghana-interviews-b.tsv"
val expectedColumnCount = 22

Using.resource(Sourcer.sourceFromFilename(inputFileName)) { inputSource =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ object Step4FindNearestLocation extends App with Logging {
val header = "prevLocation\tprevDistance\tnextLocation\tnextDistance"
}

val inputFileName = "../corpora/interviews/uganda/uganda-interviews-b.tsv"
val outputFileName = "../corpora/interviews/uganda/uganda-interviews-c.tsv"
val inputFileName = "../corpora/interviews/ghana/ghana-interviews-b.tsv"
val outputFileName = "../corpora/interviews/ghana/ghana-interviews-c.tsv"
val expectedColumnCount = 23
val tsvReader = new TsvReader()
var articleIndex = 0
Expand Down
Loading