Skip to content

Commit

Permalink
Improved formatting, implemented subsetting, runs soup to nuts
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-ubuntu committed Oct 17, 2020
1 parent 81a81a2 commit 1f79afb
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 21 deletions.
83 changes: 62 additions & 21 deletions get-datasets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,86 @@ DB="prodigy.db"
DSLIST="dataset-names.txt"
EXPORT=false

delimit(){
printf "=%.0s" {1..30}
echo ""
}

print_usage() {
printf "Usage: ..."
print_usage(){
HELPFMT=" %-6s %-20s %-15s %s\n"
printf "%b" 'Usage:\n\n'
printf "${HELPFMT}" "flag" "argument" "type" "explanation"
printf "${HELPFMT}" "----" "--------" "----" "-----------"
printf "${HELPFMT}" "-d " "DATABASE" "filepath" "Database file to process"
printf "${HELPFMT}" "-l" "LISTFILE" "filepath" "File in which to save the list of datasets"
printf "${HELPFMT}" "-e" "EXPORT" "<none>" "Export the datasets to JSON files"
printf "${HELPFMT}" "-s" "SUBSET" "int" "# of datasets to return (rather than all available)"
printf "${HELPFMT}" "-c" "CONSOLIDATIONFILE" "filepath" "File in which to dump consolidated dataset exports"
printf "${HELPFMT}" "-h" "<none>" "<none>" "Display help"
printf "${HELPFMT}" "-r" "<none>" "<none>" "Print references used in developing this script"
delimit
exit 1
}

while getopts 'd:l:e:s:c:' flag; do
delimit

while getopts 'hrd:l:es:c:' flag; do
case "${flag}" in
h) h_flag='true';
HELP=1 ;;
r) r_flag='true';
REFERENCES=1 ;;
d) d_flag='true';
DB="${OPTARG}"; echo ">> Database: [${DB}]" ;;
DB="${OPTARG}"; printf " >> %-20s %s\n" "Database:" "[${DB}]" ;;
l) l_flag='true';
DSLIST="${OPTARG}"; echo ">> List file: [${DSLIST}]" ;;
DSLIST="${OPTARG}"; printf " >> %-20s %s\n" "List file:" "[${DSLIST}]" ;;
e) e_flag='true';
EXPORT=true; echo ">> Exporting datasets?: [${EXPORT}]" ;;
EXPORT=true; printf " >> %-20s %s\n" "Exporting datasets?" "[${EXPORT}]" ;;
s) s_flag='true';
SUBSET="${OPTARG}"; echo ">> Subsetting: [${SUBSET}] datasets" ;;
SUBSET="${OPTARG}"; printf " >> %-20s %s\n" "Subsetting:" "[${SUBSET}] datasets" ;;
c) c_flag='true';
CONS="${OPTARG}"; echo ">> Consolidating datasets into file [${CONS}]" ;;
*) print_usage
exit 1 ;;
CONS="${OPTARG}"; printf " >> %-20s %s\n" "Consolidating datasets into file:" "[${CONS}]" ;;
*) print_usage ;;
esac
done

# if [[ ${s_flag} == false ]]; then
# SUBSET=${cat}
# fi

# https://stackoverflow.com/questions/369758/how-to-trim-whitespace-from-a-bash-variable
[ $HELP ] && print_usage

if [[ $s_flag == 'true' ]];
then SUBSET_CLAUSE="ORDER BY name DESC LIMIT ${SUBSET};"
else SUBSET_CLAUSE=";"
fi

QUERY=$(echo "SELECT DISTINCT name FROM dataset $SUBSET_CLAUSE")
echo $QUERY
delimit
QUERY="SELECT DISTINCT name FROM dataset $SUBSET_CLAUSE"
TASKFMT=" >> %-20s %s\n"
printf "${TASKFMT}" "Executing query:" "[${QUERY}]"
printf "${TASKFMT}" "...on database:" "[${DB}]"
printf "${TASKFMT}" "...exporting to:" "[${DSLIST}]"

echo "sqlite3 ${DB} \"${QUERY}\" >> ${DSLIST}"
delimit
# echo "sqlite3 ${DB} \"${QUERY}\" >> ${DSLIST}"
sqlite3 ${DB} "${QUERY}" >> ${DSLIST}

# if [[ $EXPORT == true ]];
# then cat ${DSLIST} | xargs dataset prodigy db-out dataset > dataset.json
# fi
if [[ $EXPORT == true ]];
then cat ${DSLIST} | xargs -I {} prodigy db-out {} ./
fi

if [ $REFERENCES ]; then
delimit
printf "%b\n" \
"This script was made possible by the many authors" \
"who contributed to developing and improving the following resources:" \
" - https://stackoverflow.com/questions/369758/how-to-trim-whitespace-from-a-bash-variable" \
" - https://jeredsutton.com/post/bash-bestish-practices-part-4/" \
" - https://dev.to/rpalo/advanced-argument-handling-in-bash-377b" \
" - https://stackoverflow.com/a/10973280" \
" - https://unix.stackexchange.com/questions/396223/bash-shell-script-output-alignment"
delimit
fi

# TODO: Implement check for prodigy executable in local env
# TODO: Implement check for local prodigy.json; if not present, warn user (when verbose)
# TODO: Implement dry-run equivalent for whole script
# TODO: Make -r flag enabled on high-verbosity output
# TODO: Wrap output section above in verbosity catcher and also make each line conditional on the flag being triggered
9 changes: 9 additions & 0 deletions prodigy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"db": "sqlite",
"db_settings": {
"sqlite": {
"name": "kim-engage-2020-10-02.db",
"path": "../w210_capstone/teacherprints/prodigy/annotations/"
}
}
}

0 comments on commit 1f79afb

Please sign in to comment.