Skip to content

Commit

Permalink
v1.7.5.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Kinggerm committed Jan 20, 2022
1 parent 55113dc commit caa29f1
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 39 deletions.
2 changes: 1 addition & 1 deletion GetOrganelleLib/pipe_control_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ def detect_spades_version(which_spades):
output, err = subprocess.Popen(
os.path.join(which_spades, "spades.py") + " -v", stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, shell=True).communicate()
return output.decode("utf8").replace("v", "").strip()
return output.decode("utf8").replace("v", "").replace("genome assembler ", "").strip()
else:
return "SPAdes N/A"

Expand Down
12 changes: 11 additions & 1 deletion GetOrganelleLib/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,24 @@ def get_versions():


versions = [
{
"number": "1.7.5.3",
"features": [
"1. get_organelle_from_reads.py: fix a bug when no qualified reads found (issue 123)",
"2. get_organelle_from_reads.py: pass --spades-options to pre-assembly for Mac M1 (issue 127)",
"3. Update README.md: input read qc; issues->discussions",
],
"time": "2022-01-19 21:00 UTC-5"
},
{
"number": "1.7.5.2",
"features": [
"1. ask the questions publicly",
"2. statistical_func.py: weighted_gmm_with_em_aic(): fix a bug that will be triggered by "
" graph produced by join_spades_fastg_by_blast.py (reported by Mergi Dinka); "
" also fix a hidden mis-indexing issue there",
"3. Utilities/join_spades_fastg_by_blast.py: update on a v name issue",
"3. Utilities/join_spades_fastg_by_blast.py: update on a v name issue "
" (issues 119)",
],
"time": "2021-12-15 02:35 UTC-5"
},
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ Find more real data examples at [GetOrganelle/wiki/Examples](https://github.com/

## Instruction

<b>Find more organelle genome assembly instruction at [GetOrganelle/wiki](https://github.com/Kinggerm/GetOrganelle/wiki).</b>
<b>Find more organelle genome assembly instruction at [GetOrganelle/wiki](https://github.com/Kinggerm/GetOrganelle/wiki). </b>

<b>In most cases, what you actually need to do is just typing in one simple command as suggested in <a href="#recipes">Recipes</a >.
But you are still highly recommended to read the following minimal introductions</b>:
But you are still highly recommended reading the following minimal introductions</b>:

### Starting from Reads

Expand Down Expand Up @@ -234,6 +234,6 @@ Although older versions like 1.6.3/1.7.1/1.7.5 may be more stable, but we always

* [GoogleGroups](https://groups.google.com/g/getorganelle)

* QQ group (ID: 908302723)
* QQ group (ID: 908302723): only for mutual help, and we will no longer reply to questions there

* Emails: Do NOT directly write to us with your questions, post the questions **publicly**, using above platforms (we will be informed automatically) or any other platforms (inform us of it). Our emails ([email protected], [email protected]) are only for receiving public question alert and private data (if applied) associated with those public questions. When you send your private data to us, enclose the email with a link where you posted the question. Our only reply emails will be a receiving confirmation, while our answers will be posted in a public place.
**Do NOT** directly write to us with your questions, instead please post the questions **publicly**, using above platforms (we will be informed automatically) or any other platforms (inform us of it). Our emails ([email protected], [email protected]) are only for receiving public question alert and private data (if applied) associated with those public questions. When you send your private data to us, enclose the email with a link where you posted the question. Our only reply emails will be a receiving confirmation, while our answers will be posted in a public place.
71 changes: 38 additions & 33 deletions get_organelle_from_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,8 @@ def get_options(description, version):
"would be automatically discarded by GetOrganelle. "
"Default: %(default)s")
group_assembly.add_argument("--spades-options", dest="other_spades_options", default="",
help="Other SPAdes options. Use double quotation marks to include all the arguments "
"and parameters.")
help="Other SPAdes options. Use double quotation marks to include all "
"the arguments and parameters.")
group_assembly.add_argument("--no-spades", dest="run_spades", action="store_false", default=True,
help="Disable SPAdes.")
group_assembly.add_argument("--ignore-k", dest="ignore_kmer_res", default=40, type=int,
Expand Down Expand Up @@ -1008,7 +1008,8 @@ def _check_default_db(this_sub_organelle, extra_type=""):
def estimate_maximum_n_reads_using_mapping(
twice_max_coverage, check_dir, original_fq_list, reads_paired,
maximum_n_reads_hard_bound, seed_files, organelle_types, in_customs, ex_customs, target_genome_sizes,
keep_temp, resume, which_blast, which_spades, which_bowtie2, threads, random_seed, verbose_log, log_handler):
keep_temp, resume, other_spades_opts,
which_blast, which_spades, which_bowtie2, threads, random_seed, verbose_log, log_handler):
from GetOrganelleLib.sam_parser import MapRecords, get_cover_range
if executable(os.path.join(UTILITY_PATH, "slim_graph.py -h")):
which_slim = UTILITY_PATH
Expand Down Expand Up @@ -1142,6 +1143,7 @@ def estimate_maximum_n_reads_using_mapping(
original_fq_files=check_fq_files, mapped_fq_file=mapped_fq, seed_fs_file=seed_f,
mean_read_len=mean_read_len, organelle_type=organelle_type,
in_custom=this_in, ex_custom=this_ex, threads=threads, resume=resume,
other_spades_opts=other_spades_opts,
which_spades=which_spades, which_slim=which_slim, which_blast=which_blast,
log_handler=log_handler if verbose_log else None, verbose_log=verbose_log)
except NotImplementedError:
Expand Down Expand Up @@ -1301,7 +1303,7 @@ def extend_with_constant_words(baits_pool, raw_fq_files, word_size, output, jump

def pre_assembly_mapped_reads_for_base_cov(
original_fq_files, mapped_fq_file, seed_fs_file, mean_read_len, organelle_type, in_custom, ex_custom,
threads, resume, which_spades, which_slim, which_blast,
threads, resume, other_spades_opts, which_spades, which_slim, which_blast,
log_handler=None, verbose_log=False, keep_temp=False):
from GetOrganelleLib.assembly_parser import get_graph_coverages_range_simple
draft_kmer = min(45, int(mean_read_len / 2) * 2 - 3)
Expand Down Expand Up @@ -1342,7 +1344,7 @@ def pre_assembly_mapped_reads_for_base_cov(
try:
# log_handler.info(" ...")
this_command = os.path.join(which_spades, "spades.py") + " -t " + str(threads) + \
" -s " + mapped_fq_file + \
" -s " + mapped_fq_file + " " + other_spades_opts + \
" -k " + str(draft_kmer) + " --only-assembler -o " + this_modified_dir
pre_assembly = subprocess.Popen(this_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
if verbose_log and log_handler:
Expand Down Expand Up @@ -1397,7 +1399,8 @@ def pre_assembly_mapped_reads_for_base_cov(
extend_with_constant_words(
theses_words, original_fq_files, word_size=gathering_word_size, output=more_fq_file)
more_command = os.path.join(which_spades, "spades.py") + " -t " + str(threads) + " -s " + \
more_fq_file + " -k " + str(draft_kmer) + " --only-assembler -o " + this_modified_dir
more_fq_file + " " + other_spades_opts + " -k " + str(draft_kmer) + \
" --only-assembler -o " + this_modified_dir
pre_assembly = subprocess.Popen(
more_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
if verbose_log and log_handler:
Expand Down Expand Up @@ -1449,7 +1452,7 @@ def check_parameters(word_size, original_fq_files, seed_fs_files, seed_fq_files,
organelle_types, in_custom_list, ex_custom_list, mean_error_rate, target_genome_sizes,
max_extending_len, mean_read_len, max_read_len, low_quality_pattern,
all_read_nums, reduce_reads_for_cov,
log_handler, which_spades, which_blast, which_bowtie2,
log_handler, other_spades_opts, which_spades, which_blast, which_bowtie2,
wc_bc_ratio_constant=0.35, larger_auto_ws=False,
threads=1, resume=False, random_seed=12345, verbose_log=False, zip_files=False):
from GetOrganelleLib.sam_parser import MapRecords, get_cover_range, mapping_gap_info_from_coverage_dict
Expand Down Expand Up @@ -1517,6 +1520,7 @@ def check_parameters(word_size, original_fq_files, seed_fs_files, seed_fq_files,
# TODO check in_customs lengths
organelle_type=organelle_types[go_t], in_custom=this_in, ex_custom=this_ex,
threads=threads, resume=resume, log_handler=log_handler, verbose_log=verbose_log,
other_spades_opts=other_spades_opts,
which_spades=which_spades, which_slim=which_slim, which_blast=which_blast)
except NotImplementedError:
if max_extending_len[go_t] == -1:
Expand Down Expand Up @@ -1819,7 +1823,7 @@ def make_read_index(original_fq_files, direction_according_to_user_input, all_re
else:
if not index_in_memory:
temp1_contig_out = open(temp1_contig_dir[0], 'w')
lengths = []
# lengths = []
use_user_direction = False
for id_file, file_name in enumerate(original_fq_files):
file_in = open(file_name, "r")
Expand Down Expand Up @@ -1889,14 +1893,14 @@ def make_read_index(original_fq_files, direction_according_to_user_input, all_re
if cancel_seq_parts and len(this_seq) > 1:
cancel_seq_parts = False
this_c_seq = complementary_seqs(this_seq)
lengths.extend([len(seq_part) for seq_part in this_seq])
# lengths.extend([len(seq_part) for seq_part in this_seq])
else:
this_seq = this_seq[0]
this_c_seq = complementary_seq(this_seq)
lengths.append(len(this_seq))
# lengths.append(len(this_seq))
else:
this_c_seq = complementary_seq(this_seq)
lengths.append(len(this_seq))
# lengths.append(len(this_seq))
if rm_duplicates:
if this_seq in seq_duplicates:
line_clusters[seq_duplicates[this_seq]].append(line_count)
Expand Down Expand Up @@ -1963,14 +1967,14 @@ def make_read_index(original_fq_files, direction_according_to_user_input, all_re
if cancel_seq_parts and len(this_seq) > 1:
cancel_seq_parts = False
this_c_seq = complementary_seqs(this_seq)
lengths.extend([len(seq_part) for seq_part in this_seq])
# lengths.extend([len(seq_part) for seq_part in this_seq])
else:
this_seq = this_seq[0]
this_c_seq = complementary_seq(this_seq)
lengths.append(len(this_seq))
# lengths.append(len(this_seq))
else:
this_c_seq = complementary_seq(this_seq)
lengths.append(len(this_seq))
# lengths.append(len(this_seq))
if rm_duplicates:
if this_seq in seq_duplicates:
line_clusters[seq_duplicates[this_seq]].append(line_count)
Expand Down Expand Up @@ -2042,14 +2046,12 @@ def make_read_index(original_fq_files, direction_according_to_user_input, all_re
if rm_duplicates:
if len_indices == 0 and line_count // 4 > 0:
log_handler.error("No qualified reads found!")
max_read_len = max(lengths)
if max_read_len < word_size:
log_handler.error("Word size (" + str(word_size) + ") CANNOT be larger than your "
"post-trimmed maximum read length (" + str(max_read_len) + ")!")
log_handler.error("Word size (" + str(word_size) + ") CANNOT be larger than your "
"post-trimmed maximum read length!")
exit()
log_handler.info(memory_usage + str(len_indices) + " candidates in all " + str(line_count // 4) + " reads")
else:
del lengths
# del lengths
log_handler.info(memory_usage + str(len_indices) + " reads")
if keep_seq_parts and cancel_seq_parts:
keep_seq_parts = False
Expand Down Expand Up @@ -3788,18 +3790,18 @@ def main():
original_fq_files = [fastq_file for fastq_file in options.unpaired_fq_files]
direction_according_to_user_input = [1] * len(options.unpaired_fq_files)
all_read_nums = [options.maximum_n_reads for foo in original_fq_files]
other_options = options.other_spades_options.split(' ')
if '-o' in other_options:
which_out = other_options.index('-o')
spades_output = other_options[which_out + 1]
del other_options[which_out: which_out + 2]
other_spd_options = options.other_spades_options.split(' ')
if '-o' in other_spd_options:
which_out = other_spd_options.index('-o')
spades_output = other_spd_options[which_out + 1]
del other_spd_options[which_out: which_out + 2]
else:
spades_output = os.path.join(out_base, options.prefix + "extended_spades")
if "--phred-offset" in other_options:
if "--phred-offset" in other_spd_options:
log_handler.warning("--spades-options '--phred-offset' was deprecated in GetOrganelle. ")
which_po = other_options.index("--phred-offset")
del other_options[which_po: which_po + 2]
other_options = ' '.join(other_options)
which_po = other_spd_options.index("--phred-offset")
del other_spd_options[which_po: which_po + 2]
other_spd_options = ' '.join(other_spd_options)

""" get reads """
extended_files_exist = max(
Expand Down Expand Up @@ -3876,6 +3878,7 @@ def main():
in_customs=options.genes_fasta, ex_customs=options.exclude_genes,
target_genome_sizes=options.target_genome_size,
keep_temp=options.keep_temp_files, resume=options.script_resume,
other_spades_opts=other_spd_options,
which_blast=options.which_blast, which_spades=options.which_spades,
which_bowtie2=options.which_bowtie2, threads=options.threads,
random_seed=options.random_seed, verbose_log=options.verbose_log, log_handler=log_handler)
Expand Down Expand Up @@ -3969,7 +3972,9 @@ def main():
max_extending_len=options.max_extending_len, mean_read_len=mean_read_len,
max_read_len=max_read_len, low_quality_pattern=low_quality_pattern,
all_read_nums=all_read_nums, reduce_reads_for_cov=options.reduce_reads_for_cov,
log_handler=log_handler, which_spades=options.which_spades,
log_handler=log_handler,
other_spades_opts=other_spd_options,
which_spades=options.which_spades,
which_blast=options.which_blast, which_bowtie2=options.which_bowtie2,
wc_bc_ratio_constant=0.35, larger_auto_ws=options.larger_auto_ws,
threads=options.threads, random_seed=options.random_seed,
Expand Down Expand Up @@ -4082,11 +4087,11 @@ def main():
log_handler.info("Assembling using SPAdes ...")
if not executable("pigz -h"):
log_handler.warning("Compression after read correction will be skipped for lack of 'pigz'")
if "--disable-gzip-output" not in other_options:
other_options += " --disable-gzip-output"
if "--disable-gzip-output" not in other_spd_options:
other_spd_options += " --disable-gzip-output"
if phred_offset in (33, 64):
other_options += " --phred-offset %i" % phred_offset
is_assembled = assembly_with_spades(options.spades_kmer, spades_output, other_options, out_base,
other_spd_options += " --phred-offset %i" % phred_offset
is_assembled = assembly_with_spades(options.spades_kmer, spades_output, other_spd_options, out_base,
options.prefix, original_fq_files, reads_paired,
which_spades=options.which_spades, verbose_log=options.verbose_log,
resume=resume, threads=options.threads, log_handler=log_handler)
Expand Down

0 comments on commit caa29f1

Please sign in to comment.