Skip to content

Commit ca10a23

Browse files
josenavasantgonza
authored andcommitted
Adding to ascii file (#7)
1 parent c8419a1 commit ca10a23

File tree

2 files changed

+92
-1
lines changed

2 files changed

+92
-1
lines changed

qiita_files/demux.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,43 @@ def to_per_sample_ascii(demux, samples=None):
461461
yield samp, to_ascii(demux, samples=[samp])
462462

463463

464+
def to_ascii_file(demux_fp, output_fp, samples=None, out_format='fastq'):
465+
"""Writes the sequences on FASTQ or FASTA format
466+
467+
Parameters
468+
----------
469+
demux_fp : str
470+
The demux file path
471+
output_fp : str
472+
The output file path
473+
samples : list of str, optional
474+
Samples to pull out. If None, then all samples will be examined.
475+
Defaults to None.
476+
out_format: {'fastq', 'fasta'}, optional
477+
The format in which the output file should be written. Default: FASTQ
478+
479+
Raises
480+
------
481+
ValueError
482+
If `out_format` is not 'fastq' or 'fasta'
483+
"""
484+
if out_format == 'fastq':
485+
formatter = format_fastq_record
486+
elif out_format == 'fasta':
487+
formatter = format_fasta_record
488+
else:
489+
raise ValueError("'out_format' should be either 'fastq' or 'fasta', "
490+
"found: %s" % out_format)
491+
492+
with open_file(demux_fp, 'r') as demux:
493+
if samples is None:
494+
samples = list(demux.keys())
495+
samples = [s.encode() for s in samples]
496+
with open(output_fp, 'wb') as out:
497+
for rec in _to_ascii(demux, samples, formatter):
498+
out.write(rec)
499+
500+
464501
def _to_file(demux_fp, sample, fp, formatter):
465502
with open_file(demux_fp, 'r') as demux:
466503
with open(fp, 'wb') as out:

qiita_files/tests/test_demux.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
_per_sample_lengths, _summarize_lengths,
2323
_set_attr_stats, _construct_datasets, to_hdf5,
2424
to_ascii, stat, to_per_sample_ascii,
25-
to_per_sample_files)
25+
to_per_sample_files, to_ascii_file)
2626

2727

2828
class BufferTests(TestCase):
@@ -328,6 +328,60 @@ def test_to_per_sample_ascii(self):
328328
obs = [(s[0], list(s[1])) for s in to_per_sample_ascii(self.hdf5_file)]
329329
self.assertEqual(obs, exp)
330330

331+
def test_to_ascii_file(self):
332+
with tempfile.NamedTemporaryFile('r+', suffix='.fq',
333+
delete=False) as f:
334+
f.write(fqdata_variable_length)
335+
336+
self.to_remove.append(f.name)
337+
338+
with tempfile.NamedTemporaryFile('r+', suffix='.demux',
339+
delete=False) as demux_f:
340+
pass
341+
342+
self.to_remove.append(demux_f.name)
343+
344+
with h5py.File(demux_f.name, 'r+') as demux:
345+
to_hdf5(f.name, demux)
346+
347+
with tempfile.NamedTemporaryFile('r+', suffix='.fq',
348+
delete=False) as obs_fq:
349+
pass
350+
self.to_remove.append(obs_fq.name)
351+
352+
to_ascii_file(demux_f.name, obs_fq.name)
353+
with open(obs_fq.name, 'rb') as obs_f:
354+
obs = obs_f.read()
355+
exp = (b'@a_0 orig_bc=abc new_bc=abc bc_diffs=0\nxyz\n+\nABC\n'
356+
b'@b_0 orig_bc=abw new_bc=wbc bc_diffs=4\nqwe\n+\nDFG\n'
357+
b'@b_1 orig_bc=abw new_bc=wbc bc_diffs=4\nqwexx\n+\nDEF#G\n')
358+
self.assertEqual(obs, exp)
359+
360+
with tempfile.NamedTemporaryFile('r+', suffix='.fa',
361+
delete=False) as obs_fa:
362+
pass
363+
self.to_remove.append(obs_fa.name)
364+
365+
to_ascii_file(demux_f.name, obs_fa.name, out_format='fasta')
366+
with open(obs_fa.name, 'rb') as obs_f:
367+
obs = obs_f.read()
368+
exp = (b'>a_0 orig_bc=abc new_bc=abc bc_diffs=0\nxyz\n'
369+
b'>b_0 orig_bc=abw new_bc=wbc bc_diffs=4\nqwe\n'
370+
b'>b_1 orig_bc=abw new_bc=wbc bc_diffs=4\nqwexx\n')
371+
self.assertEqual(obs, exp)
372+
373+
with tempfile.NamedTemporaryFile('r+', suffix='.fq',
374+
delete=False) as obs_fq:
375+
pass
376+
self.to_remove.append(obs_fq.name)
377+
378+
to_ascii_file(demux_f.name, obs_fq.name, samples=['b'])
379+
with open(obs_fq.name, 'rb') as obs_f:
380+
obs = obs_f.read()
381+
exp = (b'@b_0 orig_bc=abw new_bc=wbc bc_diffs=4\nqwe\n+\nDFG\n'
382+
b'@b_1 orig_bc=abw new_bc=wbc bc_diffs=4\nqwexx\n+\nDEF#G\n')
383+
self.assertEqual(obs, exp)
384+
331385
def test_to_files(self):
332386
# implicitly tested with test_to_per_sample_fasta
333387
pass

0 commit comments

Comments
 (0)