Skip to content

Commit

Permalink
Merge pull request #120 from bwlang/dev
Browse files Browse the repository at this point in the history
Fixed segfault with mappability filtering, minor fixes.
  • Loading branch information
dpryan79 authored Jul 22, 2021
2 parents cb5ffd1 + 62a0b98 commit 74f52da
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 14 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ htslib/*
libMethylDackel.a
MethylDackel
version.h
MethylDackel.dSYM/*
.DS_Store
2 changes: 1 addition & 1 deletion BBM_Specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ BBM is a binary format for storing a single data track for a genome. Its main re

## Format Overview:

([bracketed] items will be explained below, <angle bracketed> items are simply markers of the beginning and end of the file, and items in {curly braces} contain other items and are explained in their own sections)
(`[bracketed]` items will be explained below, `<angle bracketed>` items are simply markers of the beginning and end of the file, and items in `{curly braces}` contain other items and are explained in their own sections)

```<beginning of file>[version][chromCount]{chrom record}{chrom record}{chrom record}{chrom record}...<end of file>```

Expand Down
32 changes: 27 additions & 5 deletions common.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,34 @@ unsigned char* getMappabilityValue(Config* config, char* chrom_n, uint32_t start
unsigned char* data = malloc((end-start)*sizeof(unsigned char)); //allocate array for data
int index = start/8;
int offset = start%8;
//int startindex = start/8; //debug
//int startoffset = start%8; //debug

//debug
int arrlen = 0; //variable to store the length of the array used for the data (this is not the same as the chromosome length, as each value is one bit and this is an array of characters, i.e. bytes)
if(chromFound)
{
arrlen = config->chromLengths[chrom]/8; //array length is chromosome length over 8 (number of bits to number of bytes)
if(config->chromLengths[chrom]%8 > 0) //if there is a remainder that didn't divide evenly
{
arrlen++; //add an extra byte to store it
}
}

for(i = 0; i<end-start; i++)
{
unsigned char byte;
if(chromFound) //was a chrom ID found for chrom_n, or is chrom still -1 (or here 4,294,967,295) i.e. chrom not found
{
byte = config->bw_data[chrom][index];
if(index>=arrlen)
{
byte = 0;
//printf("warning: accessing at index %d offset %d, arrlen is %d!\ni is %d, end-start is %d, end pos should be %d!\nstartindex is %d, startoffset is %d, start is %d, end is %d\nchrom len is %d\n", index, offset, arrlen, i, end-start, (start/8)+((end-start)/8), startindex, startoffset, start, end, config->chromLengths[chrom]);
}
else
{
byte = config->bw_data[chrom][index];
}
}
else
{
Expand Down Expand Up @@ -278,9 +300,9 @@ char check_mappability(void *data, bam1_t *b) {
read1_start = b->core.mpos;
read1_end = b->core.mpos + b->core.l_qseq; //assuming both reads same length to avoid issues finding read2_end
}
vals = getMappabilityValue(ldata->config, ldata->hdr->target_name[b->core.tid], read1_start, read1_end+1);
vals = getMappabilityValue(ldata->config, ldata->hdr->target_name[b->core.tid], read1_start, read1_end);

for (i=0; i<=read1_end-read1_start; i++)
for (i=0; i<read1_end-read1_start; i++)
{
if(vals[i] > 0) //is base above threshold?
{
Expand All @@ -293,10 +315,10 @@ char check_mappability(void *data, bam1_t *b) {
}
}
free(vals);
vals = getMappabilityValue(ldata->config, ldata->hdr->target_name[b->core.tid], read2_start, read2_end+1);
vals = getMappabilityValue(ldata->config, ldata->hdr->target_name[b->core.tid], read2_start, read2_end);

num_mappable_bases = 0;
for (i=0; i<=read2_end-read2_start; i++)
for (i=0; i<read2_end-read2_start; i++)
{
if(vals[i] > 0) //is base above threshold?
{
Expand Down
46 changes: 38 additions & 8 deletions extract.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
#include <pthread.h>
#include "MethylDackel.h"

int RUNOFFSET = 99; //used to calculate the run length value to store in a BBM file, as value-to-store = actual-run-length + RUNOFFSET. Placed up here as it's a constant.
#define RUNOFFSET 99 //used to calculate the run length value to store in a BBM file, as value-to-store = actual-run-length + RUNOFFSET. Placed up here as it's a constant.
#define BBM_VERSION 1 //the version of the BBM file format read/written here


void print_version(void);

Expand Down Expand Up @@ -590,11 +592,11 @@ void extract_usage() {
" -b, --minMappableBases INT If a bigWig file is provided, this sets the\n"
" number of mappable bases needed for a read to be considered\n"
" mappable (default 15).\n"
" -O, --outputBBFile If this is specified, a Binary Bismap (.bbm) file will\n"
" -O, --outputBBMFile If this is specified, a Binary Bismap (.bbm) file will\n"
" be written with the same base name as the provided bigWig file,\n"
" but with the .bbm extension. Neither this option nor -N have any\n"
" effect if a bigWig is not specified with -M.\n"
" -N, --outputBBFileName FILE If this is specified, a Binary Bismap (.bbm) file will\n"
" -N, --outputBBMFileName FILE If this is specified, a Binary Bismap (.bbm) file will\n"
" be written at the provided filename. Neither this option nor -O\n"
" have any effect if a bigWig is not specified with -M.\n"
" -B, --mappabilityBB FILE A .bbm file containing mappability data for\n"
Expand Down Expand Up @@ -925,18 +927,42 @@ int extract_main(int argc, char *argv[]) {
if(config.outputBB && (!config.outBBMName && config.BWName))
{
char* tmp = strdup(config.BWName);
int fullLen = strlen(config.BWName); //full length
char* p = strrchr(tmp, '.');
if(p != NULL) *p = '\0';
int nameLen = strlen(tmp); //length of string not including extension
if(nameLen+4 > fullLen) //if new name will be bigger than old name
{
char* tmp2 = realloc(tmp, nameLen+5); //expand str

if(tmp2 == NULL) //if realloc failed, manually copy over
{
char* tmp2 = malloc(nameLen+5);
strcpy(tmp2, tmp);
free(tmp);
}

tmp = tmp2; //assign tmp1 to point to new str

}
config.outBBMName = strcat(tmp, ".bbm");
}

if(config.outputBB && !config.BWName)
{
fprintf(stderr, "You must specify a bigWig file when attempting to create a BBM file!\n");
extract_usage();
return -1;
}

if(argc == 1) {
extract_usage();
return 0;
}
if(argc-optind < 2) {
if(config.outputBB)
{
config.noBAM = 1; //just write BBM, don't extract
config.noBAM = 1; //just write BBM, don't extract
}
else
{
Expand Down Expand Up @@ -987,8 +1013,11 @@ int extract_main(int argc, char *argv[]) {
}

//Open the files
config.FastaName = argv[optind];
config.BAMName = argv[optind+1];
if(!config.noBAM)
{
config.FastaName = argv[optind];
config.BAMName = argv[optind+1];
}
if(!config.noBAM)
{
if((config.fp = hts_open(argv[optind+1], "rb")) == NULL) {
Expand Down Expand Up @@ -1037,7 +1066,7 @@ int extract_main(int argc, char *argv[]) {
fprintf(stderr, "Couldn't open %s for writing! Insufficient permissions?\n", config.outBBMName);
return -7;
}
unsigned char bbm_version = 1;
unsigned char bbm_version = BBM_VERSION;
fwrite(&bbm_version, sizeof(char), 1, f); //write version
}
config.bw_data = malloc(config.BW_ptr->cl->nKeys*sizeof(char*)); //init outer array
Expand Down Expand Up @@ -1175,6 +1204,7 @@ int extract_main(int argc, char *argv[]) {
free(config.chromNames);
free(config.bw_data);
bwClose(config.BW_ptr);
free(config.outBBMName);
return 0;
}
}
Expand All @@ -1189,7 +1219,7 @@ int extract_main(int argc, char *argv[]) {
char readlen; //used to store the length of data read from the file. Could be used in the future to detect unexpected EOF throughout reading the file, but it is currently only used to check for blank files and incorrect chrom name null terminators
unsigned char bbm_version;
readlen = fread(&bbm_version, sizeof(char), 1, config.BBM_ptr); //get BBM version
if(bbm_version != 1)
if(bbm_version != BBM_VERSION)
{
fprintf(stderr, "fatal: %s has wrong BBM version or is malformed\n", config.BBMName);
return -10;
Expand Down

0 comments on commit 74f52da

Please sign in to comment.