mirror of
https://github.com/Ardour/ardour.git
synced 2025-12-06 14:54:56 +01:00
Use soundtouch for vocal audio time stretching (1/2)
This commit is contained in:
parent
fb2d33c6a3
commit
262281bc1f
5 changed files with 227 additions and 100 deletions
|
|
@ -27,7 +27,6 @@
|
|||
#include "ardour/filter.h"
|
||||
#include "ardour/timefx_request.h"
|
||||
|
||||
#ifdef USE_RUBBERBAND
|
||||
|
||||
#include "ardour/rb_effect.h"
|
||||
|
||||
|
|
@ -41,8 +40,7 @@ class LIBARDOUR_API RBStretch : public RBEffect {
|
|||
|
||||
} /* namespace */
|
||||
|
||||
#else
|
||||
|
||||
#ifdef HAVE_SOUNDTOUCH
|
||||
#include <soundtouch/SoundTouch.h>
|
||||
|
||||
namespace ARDOUR {
|
||||
|
|
@ -52,16 +50,14 @@ class LIBARDOUR_API STStretch : public Filter {
|
|||
STStretch (ARDOUR::Session&, TimeFXRequest&);
|
||||
~STStretch ();
|
||||
|
||||
int run (boost::shared_ptr<ARDOUR::Region>);
|
||||
int run (boost::shared_ptr<ARDOUR::Region>, Progress* progress = 0);
|
||||
|
||||
private:
|
||||
TimeFXRequest& tsr;
|
||||
|
||||
soundtouch::SoundTouch st;
|
||||
};
|
||||
|
||||
} /* namespace */
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __ardour_stretch_h__ */
|
||||
|
|
|
|||
|
|
@ -26,10 +26,11 @@ namespace ARDOUR {
|
|||
struct TimeFXRequest : public InterThreadInfo {
|
||||
TimeFXRequest()
|
||||
: time_fraction(0), pitch_fraction(0),
|
||||
quick_seek(false), antialias(false), opts(0) {}
|
||||
use_soundtouch(false), quick_seek(false), antialias(false), opts(0) {}
|
||||
float time_fraction;
|
||||
float pitch_fraction;
|
||||
/* SoundTouch */
|
||||
bool use_soundtouch;
|
||||
bool quick_seek;
|
||||
bool antialias;
|
||||
/* RubberBand */
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include "ardour/audiofilesource.h"
|
||||
#include "ardour/session.h"
|
||||
#include "ardour/audioregion.h"
|
||||
#include "ardour/progress.h"
|
||||
|
||||
#include "pbd/i18n.h"
|
||||
|
||||
|
|
@ -40,23 +41,6 @@ STStretch::STStretch (Session& s, TimeFXRequest& req)
|
|||
: Filter (s)
|
||||
, tsr (req)
|
||||
{
|
||||
float percentage;
|
||||
|
||||
/* the soundtouch code wants a *tempo* change percentage, which is
|
||||
of opposite sign to the length change.
|
||||
*/
|
||||
|
||||
percentage = -tsr.time_fraction;
|
||||
|
||||
st.setSampleRate (s.sample_rate());
|
||||
st.setChannels (1);
|
||||
st.setTempoChange (percentage);
|
||||
st.setPitchSemiTones (0);
|
||||
st.setRateChange (0);
|
||||
|
||||
st.setSetting(SETTING_USE_QUICKSEEK, tsr.quick_seek);
|
||||
st.setSetting(SETTING_USE_AA_FILTER, tsr.antialias);
|
||||
|
||||
}
|
||||
|
||||
STStretch::~STStretch ()
|
||||
|
|
@ -64,99 +48,232 @@ STStretch::~STStretch ()
|
|||
}
|
||||
|
||||
int
|
||||
STStretch::run (boost::shared_ptr<Region> a_region, Progress* progress)
|
||||
STStretch::run (boost::shared_ptr<Region> r, Progress* progress)
|
||||
{
|
||||
boost::shared_ptr<AudioRegion> region = boost::dynamic_pointer_cast<AudioRegion> (r);
|
||||
|
||||
if (!region) {
|
||||
error << "STStretch::run() passed a non-audio region! WTF?" << endmsg;
|
||||
return -1;
|
||||
}
|
||||
|
||||
SourceList nsrcs;
|
||||
samplecnt_t total_samples;
|
||||
samplecnt_t done;
|
||||
int ret = -1;
|
||||
const samplecnt_t bufsize = 16384;
|
||||
const samplecnt_t bufsize = 8192;
|
||||
gain_t* gain_buffer = 0;
|
||||
Sample *buffer = 0;
|
||||
Sample** buffers = 0;
|
||||
char suffix[32];
|
||||
string new_name;
|
||||
string::size_type at;
|
||||
|
||||
#ifndef NDEBUG
|
||||
cerr << "STStretch: source region: position = " << region->position ()
|
||||
<< ", start = " << region->start ()
|
||||
<< ", length = " << region->length ()
|
||||
<< ", ancestral_start = " << region->ancestral_start ()
|
||||
<< ", ancestral_length = " << region->ancestral_length ()
|
||||
<< ", stretch " << region->stretch ()
|
||||
<< ", shift " << region->shift () << endl;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We have two cases to consider:
|
||||
*
|
||||
* 1. The region has not been stretched before.
|
||||
*
|
||||
* In this case, we just want to read region->length() samples
|
||||
* from region->start().
|
||||
*
|
||||
* We will create a new region of region->length() *
|
||||
* tsr.time_fraction samples. The new region will have its
|
||||
* start set to 0 (because it has a new audio file that begins
|
||||
* at the start of the stretched area) and its ancestral_start
|
||||
* set to region->start() (so that we know where to begin
|
||||
* reading if we want to stretch it again).
|
||||
*
|
||||
* 2. The region has been stretched before.
|
||||
*
|
||||
* The region starts at region->start() samples into its
|
||||
* (possibly previously stretched) source file. But we don't
|
||||
* want to read from its source file; we want to read from the
|
||||
* file it was originally stretched from.
|
||||
*
|
||||
* The region's source begins at region->ancestral_start()
|
||||
* samples into its master source file. Thus, we need to start
|
||||
* reading at region->ancestral_start() + (region->start() /
|
||||
* region->stretch()) samples into the master source. This
|
||||
* value will also become the ancestral_start for the new
|
||||
* region.
|
||||
*
|
||||
* We cannot use region->ancestral_length() to establish how
|
||||
* many samples to read, because it won't be up to date if the
|
||||
* region has been trimmed since it was last stretched. We
|
||||
* must read region->length() / region->stretch() samples and
|
||||
* stretch them by tsr.time_fraction * region->stretch(), for
|
||||
* a new region of region->length() * tsr.time_fraction
|
||||
* samples.
|
||||
*
|
||||
* Case 1 is of course a special case of 2, where
|
||||
* region->ancestral_start() == 0 and region->stretch() == 1.
|
||||
*
|
||||
* When we ask to read from a region, we supply a position on
|
||||
* the global timeline. The read function calculates the
|
||||
* offset into the source as (position - region->position()) +
|
||||
* region->start(). This calculation is used regardless of
|
||||
* whether we are reading from a master or
|
||||
* previously-stretched region. In order to read from a point
|
||||
* n samples into the master source, we need to provide n -
|
||||
* region->start() + region->position() as our position
|
||||
* argument to master_read_at().
|
||||
*
|
||||
* Note that region->ancestral_length() is not used.
|
||||
*
|
||||
* I hope this is clear.
|
||||
*/
|
||||
|
||||
double stretch = region->stretch () * tsr.time_fraction;
|
||||
stretch = std::min(20.0, std::max(0.02, stretch));
|
||||
samplecnt_t read_start = region->ancestral_start () +
|
||||
samplecnt_t (region->start () / (double)region->stretch ());
|
||||
|
||||
samplecnt_t read_duration =
|
||||
samplecnt_t (region->length () / (double)region->stretch ());
|
||||
|
||||
uint32_t channels = region->n_channels ();
|
||||
|
||||
#ifndef NDEBUG
|
||||
cerr << "RBStretcher: input-len = " << read_duration
|
||||
<< ", rate = " << session.sample_rate ()
|
||||
<< ", channels = " << channels
|
||||
<< ", opts = " << tsr.opts
|
||||
<< ", stretch = " << stretch << endl;
|
||||
#endif
|
||||
|
||||
|
||||
soundtouch::SoundTouch st[channels];
|
||||
for (uint32_t i = 0; i < channels; ++i) {
|
||||
st[i].setSampleRate(session.sample_rate());
|
||||
st[i].setChannels(1);
|
||||
st[i].setTempo(1.0 / stretch);
|
||||
|
||||
st[i].setSetting(SETTING_USE_QUICKSEEK, tsr.quick_seek);
|
||||
st[i].setSetting(SETTING_USE_AA_FILTER, tsr.antialias);
|
||||
st[i].setSetting(SETTING_SEQUENCE_MS, 40);
|
||||
st[i].setSetting(SETTING_SEEKWINDOW_MS, 15);
|
||||
st[i].setSetting(SETTING_OVERLAP_MS, 8);
|
||||
}
|
||||
|
||||
progress->set_progress (0);
|
||||
tsr.done = false;
|
||||
|
||||
boost::shared_ptr<AudioRegion> region = boost::dynamic_pointer_cast<AudioRegion>(a_region);
|
||||
|
||||
total_samples = region->length() * region->n_channels();
|
||||
done = 0;
|
||||
|
||||
/* the name doesn't need to be super-precise, but allow for 2 fractional
|
||||
digits just to disambiguate close but not identical stretches.
|
||||
* digits just to disambiguate close but not identical FX
|
||||
*/
|
||||
|
||||
snprintf (suffix, sizeof (suffix), "@%d", (int) floor (tsr.time_fraction * 100.0f));
|
||||
snprintf (suffix, sizeof (suffix), "@%d", (int)floor (stretch * 100.0f));
|
||||
|
||||
/* create new sources */
|
||||
|
||||
samplepos_t pos = 0;
|
||||
|
||||
if (make_new_sources (region, nsrcs, suffix)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
gain_buffer = new gain_t[bufsize];
|
||||
buffer = new Sample[bufsize];
|
||||
buffers = new float*[channels];
|
||||
|
||||
// soundtouch throws runtime_error on error
|
||||
for (uint32_t i = 0; i < channels; ++i) {
|
||||
buffers[i] = new float[bufsize];
|
||||
}
|
||||
|
||||
/* we read from the master (original) sources for the region,
|
||||
* not the ones currently in use, in case it's already been
|
||||
* subject to timefx. */
|
||||
|
||||
try {
|
||||
for (uint32_t i = 0; i < nsrcs.size(); ++i) {
|
||||
/* start process */
|
||||
pos = 0;
|
||||
|
||||
boost::shared_ptr<AudioSource> asrc
|
||||
= boost::dynamic_pointer_cast<AudioSource>(nsrcs[i]);
|
||||
|
||||
samplepos_t pos = 0;
|
||||
while (pos < read_duration && !tsr.cancel) {
|
||||
samplecnt_t this_read = 0;
|
||||
|
||||
st.clear();
|
||||
for (uint32_t i = 0; i < channels; ++i) {
|
||||
samplepos_t this_time;
|
||||
this_time = min (bufsize, read_duration - pos);
|
||||
|
||||
while (!tsr.cancel && pos < region->length()) {
|
||||
samplecnt_t this_time;
|
||||
samplepos_t this_position;
|
||||
this_position = read_start + pos -
|
||||
region->start () + region->position ();
|
||||
|
||||
this_time = min (bufsize, region->length() - pos);
|
||||
this_read = region->master_read_at (buffers[i],
|
||||
buffers[i],
|
||||
gain_buffer,
|
||||
this_position,
|
||||
this_time,
|
||||
i);
|
||||
|
||||
/* read from the master (original) sources for the region,
|
||||
not the ones currently in use, in case it's already been
|
||||
subject to timefx.
|
||||
*/
|
||||
|
||||
if ((this_read = region->master_read_at (buffer, buffer, gain_buffer, pos + region->position(), this_time)) != this_time) {
|
||||
error << string_compose (_("tempoize: error reading data from %1"), asrc->name()) << endmsg;
|
||||
if (this_read != this_time) {
|
||||
error << string_compose (_("tempoize: error reading data from %1 at %2 (wanted %3, got %4)"),
|
||||
region->name (), pos + region->position (), this_time, this_read)
|
||||
<< endmsg;
|
||||
goto out;
|
||||
}
|
||||
|
||||
st[i].putSamples (buffers[i], this_read);
|
||||
}
|
||||
pos += this_read;
|
||||
done += this_read;
|
||||
progress->set_progress (0.25 + ((float)pos / read_duration) * 0.75);
|
||||
|
||||
progress->set_progress ((float) done / total_samples);
|
||||
for (uint32_t i = 0; i < channels; ++i) {
|
||||
samplecnt_t avail = 0;
|
||||
while ((avail = st[i].numSamples ()) > 0) {
|
||||
this_read = min (bufsize, avail);
|
||||
|
||||
st.putSamples (buffer, this_read);
|
||||
this_read = st[i].receiveSamples(buffers[i], this_read);
|
||||
boost::shared_ptr<AudioSource> asrc = boost::dynamic_pointer_cast<AudioSource> (nsrcs[i]);
|
||||
if (!asrc) {
|
||||
continue;
|
||||
}
|
||||
|
||||
while ((this_read = st.receiveSamples (buffer, bufsize)) > 0 && !tsr.cancel) {
|
||||
if (asrc->write (buffer, this_read) != this_read) {
|
||||
error << string_compose (_("error writing tempo-adjusted data to %1"), asrc->name()) << endmsg;
|
||||
if (asrc->write (buffers[i], this_read) != this_read) {
|
||||
error << string_compose (_("error writing tempo-adjusted data to %1"), nsrcs[i]->name ()) << endmsg;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!tsr.cancel) {
|
||||
st.flush ();
|
||||
for (uint32_t i = 0; i < channels; ++i) {
|
||||
st[i].flush ();
|
||||
}
|
||||
}
|
||||
|
||||
while (!tsr.cancel && (this_read = st.receiveSamples (buffer, bufsize)) > 0) {
|
||||
if (asrc->write (buffer, this_read) != this_read) {
|
||||
error << string_compose (_("error writing tempo-adjusted data to %1"), asrc->name()) << endmsg;
|
||||
/* completing */
|
||||
for (uint32_t i = 0; i < channels; ++i) {
|
||||
samplecnt_t avail = 0;
|
||||
samplecnt_t this_read = 0;
|
||||
while ((avail = st[i].numSamples ()) > 0) {
|
||||
this_read = min (bufsize, avail);
|
||||
|
||||
this_read = st[i].receiveSamples(buffers[i], this_read);
|
||||
|
||||
boost::shared_ptr<AudioSource> asrc = boost::dynamic_pointer_cast<AudioSource> (nsrcs[i]);
|
||||
if (!asrc) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (asrc->write (buffers[i], this_read) != this_read) {
|
||||
error << string_compose (_("error writing tempo-adjusted data to %1"), nsrcs[i]->name ()) << endmsg;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (runtime_error& err) {
|
||||
error << _("timefx code failure. please notify ardour-developers.") << endmsg;
|
||||
error << string_compose (_("programming error: %1"), X_("timefx code failure")) << endmsg;
|
||||
error << err.what () << endmsg;
|
||||
goto out;
|
||||
}
|
||||
|
|
@ -164,7 +281,7 @@ STStretch::run (boost::shared_ptr<Region> a_region, Progress* progress)
|
|||
new_name = region->name ();
|
||||
at = new_name.find ('@');
|
||||
|
||||
// remove any existing stretch indicator
|
||||
/* remove any existing stretch indicator */
|
||||
|
||||
if (at != string::npos && at > 2) {
|
||||
new_name = new_name.substr (0, at - 1);
|
||||
|
|
@ -179,26 +296,36 @@ STStretch::run (boost::shared_ptr<Region> a_region, Progress* progress)
|
|||
/* now reset ancestral data for each new region */
|
||||
|
||||
for (vector<boost::shared_ptr<Region> >::iterator x = results.begin (); x != results.end (); ++x) {
|
||||
samplepos_t astart = (*x)->ancestral_start();
|
||||
samplepos_t alength = (*x)->ancestral_length();
|
||||
samplepos_t start;
|
||||
samplecnt_t length;
|
||||
(*x)->set_ancestral_data (read_start,
|
||||
read_duration,
|
||||
stretch,
|
||||
1.0);
|
||||
(*x)->set_master_sources (region->master_sources ());
|
||||
/* multiply the old (possibly previously stretched) region length by the extra
|
||||
* stretch this time around to get its new length. this is a non-music based edit atm.
|
||||
*/
|
||||
(*x)->set_length ((*x)->length () * tsr.time_fraction, 0);
|
||||
}
|
||||
|
||||
// note: tsr.fraction is a percentage of original length. 100 = no change,
|
||||
// 50 is half as long, 200 is twice as long, etc.
|
||||
/* stretch region gain envelope */
|
||||
/* XXX: assuming we've only processed one input region into one result here */
|
||||
|
||||
float stretch = (*x)->stretch() * (tsr.time_fraction/100.0);
|
||||
|
||||
start = (samplepos_t) floor (astart + ((astart - (*x)->start()) / stretch));
|
||||
length = (samplecnt_t) floor (alength / stretch);
|
||||
|
||||
(*x)->set_ancestral_data (start, length, stretch, (*x)->shift());
|
||||
if (ret == 0 && tsr.time_fraction != 1) {
|
||||
boost::shared_ptr<AudioRegion> result = boost::dynamic_pointer_cast<AudioRegion> (results.front ());
|
||||
assert (result);
|
||||
result->envelope ()->x_scale (tsr.time_fraction);
|
||||
}
|
||||
|
||||
out:
|
||||
|
||||
delete[] gain_buffer;
|
||||
delete [] buffer;
|
||||
|
||||
if (buffers) {
|
||||
for (uint32_t i = 0; i < channels; ++i) {
|
||||
delete[] buffers[i];
|
||||
}
|
||||
delete[] buffers;
|
||||
}
|
||||
|
||||
if (ret || tsr.cancel) {
|
||||
for (SourceList::iterator si = nsrcs.begin (); si != nsrcs.end (); ++si) {
|
||||
|
|
|
|||
|
|
@ -327,8 +327,8 @@ def configure(conf):
|
|||
if conf.is_defined ('HAVE_LV2_1_10_0'):
|
||||
conf.define ('LV2_EXTENDED', 1)
|
||||
|
||||
# autowaf.check_pkg(conf, 'soundtouch-1.0', uselib_store='SOUNDTOUCH',
|
||||
# mandatory=False)
|
||||
autowaf.check_pkg(conf, 'soundtouch', uselib_store='SOUNDTOUCH',
|
||||
atleast_version='1.8.0', mandatory=False)
|
||||
autowaf.check_pkg(conf, 'cppunit', uselib_store='CPPUNIT',
|
||||
atleast_version='1.12.0', mandatory=False)
|
||||
autowaf.check_pkg(conf, 'ogg', uselib_store='OGG', atleast_version='1.1.2')
|
||||
|
|
@ -428,8 +428,10 @@ def build(bld):
|
|||
'LIBARDOUR="' + bld.env['lwrcase_dirname'] + '"'
|
||||
]
|
||||
|
||||
if bld.is_defined('HAVE_SOUNDTOUCH'):
|
||||
obj.source += ['st_stretch.cc']
|
||||
#obj.source += ' st_stretch.cc st_pitch.cc '
|
||||
#obj.uselib += ' SOUNDTOUCH '
|
||||
obj.uselib += ['SOUNDTOUCH']
|
||||
#obj.add_objects = 'default/libs/surfaces/control_protocol/smpte_1.o'
|
||||
|
||||
if bld.is_defined('HAVE_LILV') :
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
|
||||
#ifdef COMPILER_MINGW
|
||||
#include <io.h> // For W_OK
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <glibmm/fileutils.h>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue