The Legacy Application

Story: WAV Snippet Publisher

As a content reseller, I want the WAV Snippet Publisher to extract a short snippet from each WAV file in a source directory and publish it as a new WAV file in a target directory.

The Waveform Audio (WAV) file standard derives from Microsoft and IBM and is based upon their Resource Interchange File Format (RIFF) (http://en.wikipedia.org/wiki/WAV). WAV files contain audio data encoded into a collection of samples, often using the commonly accepted Pulse-Code Modulation (PCM) standard. See http://en.wikipedia.org/wiki/Pulse-code_modulation. You can find a simplified version of the WAV format at https://ccrma.stanford.edu/courses/422/projects/WaveFormat/.

The snippet publisher meets the current but growing needs of the customer, though the implementation contains many limitations. For example, it doesn’t handle the rare case where the length of the audio samples is odd. Nor does it handle multiple-channel WAV files. It also does not support all platforms because of little/big endian differences. Our customer has asked us to resolve all of these limitations (leaving the rest to you for future incorporation).

Story: Add Support for Multiple Channels

Currently, the snippet publisher handles only single-channel (mono) WAV files properly. As a content reseller, I want to ensure that stereo WAV snippets do not get chopped in half.

Ahh! Change is here already! Unfortunately, almost no unit tests exist for the WAV Snippet Publisher, though that shouldn’t be surprising. (In preparing for this chapter, I built the codebase without employing TDD. It seemed quicker at first, but my lack of tests quickly chewed up more time than saved as I fell into a couple of defect weed patches.)

The open function represents the bulk of the WAV Snippet Publisher logic. It sprawls across the next few pages.

wav/1/WavReader.cpp

​void​ WavReader::open(​const​ std::​string​& name, ​bool​ trace) {

   rLog(channel, ​"opening %s"​, name.c_str());

   ifstream file{name, ios::in | ios::binary};

   ​if​ (!file.is_open()) {

      rLog(channel, ​"unable to read %s"​, name.c_str());

      ​return​;

}

   ofstream out{dest_ + ​"/"​ + name, ios::out | ios::binary};

   RiffHeader header;

   file.read(​reinterpret_cast​<​char​*>(&header), ​sizeof​(RiffHeader));

   ​if​ (toString(header.id, 4) != ​"RIFF"​) {

      rLog(channel, ​"ERROR: %s is not a RIFF file"​,

         name.c_str());

      ​return​;

}

   ​if​ (toString(header.format, 4) != ​"WAVE"​) {

      rLog(channel, ​"ERROR: %s is not a wav file: %s"​,

         name.c_str(),

         toString(header.format, 4).c_str());

      ​return​;

}

   out.write(​reinterpret_cast​<​char​*>(&header), ​sizeof​(RiffHeader));

   FormatSubchunkHeader formatSubchunkHeader;

   file.read(​reinterpret_cast​<​char​*>(&formatSubchunkHeader),

         ​sizeof​(FormatSubchunkHeader));

   ​if​ (toString(formatSubchunkHeader.id, 4) != ​"fmt "​) {

      rLog(channel, ​"ERROR: %s expecting 'fmt' for subchunk header; got '%s'"​,

         name.c_str(),

         toString(formatSubchunkHeader.id, 4).c_str());

      ​return​;

}

   out.write(​reinterpret_cast​<​char​*>(&formatSubchunkHeader),

         ​sizeof​(FormatSubchunkHeader));

   FormatSubchunk formatSubchunk;

   file.read(​reinterpret_cast​<​char​*>(&formatSubchunk), ​sizeof​(FormatSubchunk));

   out.write(​reinterpret_cast​<​char​*>(&formatSubchunk), ​sizeof​(FormatSubchunk));

   rLog(channel, ​"format tag: %u"​, formatSubchunk.formatTag); ​// show as hex?​

   rLog(channel, ​"samples per second: %u"​, formatSubchunk.samplesPerSecond);

   rLog(channel, ​"channels: %u"​, formatSubchunk.channels);

   rLog(channel, ​"bits per sample: %u"​, formatSubchunk.bitsPerSample);

   ​auto​ bytes = formatSubchunkHeader.subchunkSize - ​sizeof​(FormatSubchunk);

   ​auto​ additionalBytes = ​new​ ​char​[bytes];

   file.read(additionalBytes, bytes);

   out.write(additionalBytes, bytes);

   FactOrData factOrData;

   file.read(​reinterpret_cast​<​char​*>(&factOrData), ​sizeof​(FactOrData));

   out.write(​reinterpret_cast​<​char​*>(&factOrData), ​sizeof​(FactOrData));

   ​if​ (toString(factOrData.tag, 4) == ​"fact"​) {

      FactChunk factChunk;

      file.read(​reinterpret_cast​<​char​*>(&factChunk), ​sizeof​(FactChunk));

      out.write(​reinterpret_cast​<​char​*>(&factChunk), ​sizeof​(FactChunk));

      file.read(​reinterpret_cast​<​char​*>(&factOrData), ​sizeof​(FactOrData));

      out.write(​reinterpret_cast​<​char​*>(&factOrData), ​sizeof​(FactOrData));

      rLog(channel, ​"samples per channel: %u"​, factChunk.samplesPerChannel);

}

   ​if​ (toString(factOrData.tag, 4) != ​"data"​) {

      ​string​ tag{toString(factOrData.tag, 4)};

      rLog(channel, ​"%s ERROR: unknown tag>%s<"​, name.c_str(), tag.c_str());

      ​return​;

}

   DataChunk dataChunk;

   file.read(​reinterpret_cast​<​char​*>(&dataChunk), ​sizeof​(DataChunk));

   rLog(channel, ​"riff header size = %u"​ , ​sizeof​(RiffHeader));

   rLog(channel, ​"subchunk header size = %u"​, ​sizeof​(FormatSubchunkHeader));

   rLog(channel, ​"subchunk size = %u"​, formatSubchunkHeader.subchunkSize);

   rLog(channel, ​"data length = %u"​, dataChunk.length);

   ​// TODO if odd there is a padding byte!​

   ​auto​ data = ​new​ ​char​[dataChunk.length];

   file.read(data, dataChunk.length);

   file.close();

   ​// all of it​

​//   out.write(data, dataChunk.length);​

   ​// TODO: multiple channels​

*

   uint32_t secondsDesired{10};

*

   ​if​ (formatSubchunk.bitsPerSample == 0) formatSubchunk.bitsPerSample = 8;

*

   uint32_t bytesPerSample{formatSubchunk.bitsPerSample / uint32_t{8}};

*

   uint32_t samplesToWrite{secondsDesired * formatSubchunk.samplesPerSecond};

*

   uint32_t totalSamples{dataChunk.length / bytesPerSample};

*

*

   samplesToWrite = min(samplesToWrite, totalSamples);

*

*

   uint32_t totalSeconds{totalSamples / formatSubchunk.samplesPerSecond};

*

   rLog(channel, ​"total seconds %u "​, totalSeconds);

*

*

   dataChunk.length = samplesToWrite * bytesPerSample;

*

   out.write(​reinterpret_cast​<​char​*>(&dataChunk), ​sizeof​(DataChunk));

*

*

   uint32_t startingSample{

*

      totalSeconds >= 10 ? 10 * formatSubchunk.samplesPerSecond : 0};

*

   rLog(channel, ​"writing %u samples"​, samplesToWrite);

*

   ​for​ (​auto​ sample = startingSample;

*

        sample < startingSample + samplesToWrite;

*

        sample++) {

*

      ​auto​ byteOffsetForSample = sample * bytesPerSample;

*

      ​for​ (uint32_t byte{0}; byte < bytesPerSample; byte++)

*

         out.put(data[byteOffsetForSample + byte]);

*

}

   rLog(channel, ​"completed writing %s"​, name.c_str());

   descriptor_->add(dest_, name,

         totalSeconds, formatSubchunk.samplesPerSecond, formatSubchunk.channels);

   out.close();

}

The open function contains comments, to-dos, commented-out logic, questionable names, magic numbers, repetition, and a one-stop-shopping collection of code to solve all problems. What’s not to like?

Well, we don’t like much in this code. The convoluted code provides ample opportunities for us to mess up as we attempt to add support for multiple channels. It would help to have tests in the areas we want to change.

wav/1/WavReader.cpp
	void WavReader::open(const std::string& name, bool trace) {
	rLog(channel, "opening %s", name.c_str());

	ifstream file{name, ios::in \| ios::binary};
	if (!file.is_open()) {
	rLog(channel, "unable to read %s", name.c_str());
	return;
	}

	ofstream out{dest_ + "/" + name, ios::out \| ios::binary};

	RiffHeader header;
	file.read(reinterpret_cast<char>(&header), sizeof*(RiffHeader));

	if (toString(header.id, 4) != "RIFF") {
	rLog(channel, "ERROR: %s is not a RIFF file",
	name.c_str());
	return;
	}
	if (toString(header.format, 4) != "WAVE") {
	rLog(channel, "ERROR: %s is not a wav file: %s",
	name.c_str(),
	toString(header.format, 4).c_str());
	return;
	}
	out.write(reinterpret_cast<char>(&header), sizeof*(RiffHeader));

	FormatSubchunkHeader formatSubchunkHeader;
	file.read(reinterpret_cast<char*>(&formatSubchunkHeader),
	sizeof(FormatSubchunkHeader));

	if (toString(formatSubchunkHeader.id, 4) != "fmt ") {
	rLog(channel, "ERROR: %s expecting 'fmt' for subchunk header; got '%s'",
	name.c_str(),
	toString(formatSubchunkHeader.id, 4).c_str());
	return;
	}

	out.write(reinterpret_cast<char*>(&formatSubchunkHeader),
	sizeof(FormatSubchunkHeader));

	FormatSubchunk formatSubchunk;
	file.read(reinterpret_cast<char>(&formatSubchunk), sizeof*(FormatSubchunk));

	out.write(reinterpret_cast<char>(&formatSubchunk), sizeof*(FormatSubchunk));

	rLog(channel, "format tag: %u", formatSubchunk.formatTag); // show as hex?
	rLog(channel, "samples per second: %u", formatSubchunk.samplesPerSecond);
	rLog(channel, "channels: %u", formatSubchunk.channels);
	rLog(channel, "bits per sample: %u", formatSubchunk.bitsPerSample);

	auto bytes = formatSubchunkHeader.subchunkSize - sizeof(FormatSubchunk);

	auto additionalBytes = new char[bytes];
	file.read(additionalBytes, bytes);
	out.write(additionalBytes, bytes);

	FactOrData factOrData;
	file.read(reinterpret_cast<char>(&factOrData), sizeof*(FactOrData));
	out.write(reinterpret_cast<char>(&factOrData), sizeof*(FactOrData));

	if (toString(factOrData.tag, 4) == "fact") {
	FactChunk factChunk;
	file.read(reinterpret_cast<char>(&factChunk), sizeof*(FactChunk));
	out.write(reinterpret_cast<char>(&factChunk), sizeof*(FactChunk));

	file.read(reinterpret_cast<char>(&factOrData), sizeof*(FactOrData));
	out.write(reinterpret_cast<char>(&factOrData), sizeof*(FactOrData));

	rLog(channel, "samples per channel: %u", factChunk.samplesPerChannel);
	}

	if (toString(factOrData.tag, 4) != "data") {
	string tag{toString(factOrData.tag, 4)};
	rLog(channel, "%s ERROR: unknown tag>%s<", name.c_str(), tag.c_str());
	return;
	}

	DataChunk dataChunk;
	file.read(reinterpret_cast<char>(&dataChunk), sizeof*(DataChunk));

	rLog(channel, "riff header size = %u" , sizeof(RiffHeader));
	rLog(channel, "subchunk header size = %u", sizeof(FormatSubchunkHeader));
	rLog(channel, "subchunk size = %u", formatSubchunkHeader.subchunkSize);
	rLog(channel, "data length = %u", dataChunk.length);

	// TODO if odd there is a padding byte!
	auto data = new char[dataChunk.length];
	file.read(data, dataChunk.length);
	file.close();

	// all of it
	// out.write(data, dataChunk.length);
	// TODO: multiple channels
*	uint32_t secondsDesired{10};
*	if (formatSubchunk.bitsPerSample == 0) formatSubchunk.bitsPerSample = 8;
*	uint32_t bytesPerSample{formatSubchunk.bitsPerSample / uint32_t{8}};
*	uint32_t samplesToWrite{secondsDesired * formatSubchunk.samplesPerSecond};
*	uint32_t totalSamples{dataChunk.length / bytesPerSample};
*
*	samplesToWrite = min(samplesToWrite, totalSamples);
*
*	uint32_t totalSeconds{totalSamples / formatSubchunk.samplesPerSecond};
*	rLog(channel, "total seconds %u ", totalSeconds);
*
*	dataChunk.length = samplesToWrite * bytesPerSample;
*	out.write(reinterpret_cast<char>(&dataChunk), sizeof*(DataChunk));
*
*	uint32_t startingSample{
*	totalSeconds >= 10 ? 10 * formatSubchunk.samplesPerSecond : 0};
*	rLog(channel, "writing %u samples", samplesToWrite);
*	for (auto sample = startingSample;
*	sample < startingSample + samplesToWrite;
*	sample++) {
*	auto byteOffsetForSample = sample * bytesPerSample;
*	for (uint32_t byte{0}; byte < bytesPerSample; byte++)
*	out.put(data[byteOffsetForSample + byte]);
*	}
	rLog(channel, "completed writing %s", name.c_str());
	descriptor_->add(dest_, name,
	totalSeconds, formatSubchunk.samplesPerSecond, formatSubchunk.channels);
	out.close();
	}

Table of Contents for The Legacy Application

Create new playlist

Sign In

Sign Up

The Legacy Application

Table of Contents for
The Legacy Application