update qm-dsp library

This commit is contained in:
Robin Gareus 2016-10-06 00:16:44 +02:00
parent 2a27cc4758
commit f68d2e06bc
100 changed files with 58968 additions and 55091 deletions

View file

@ -51,7 +51,7 @@ void ClusterMeltSegmenter::initialise(int fs)
if (featureType == FEATURE_TYPE_CONSTQ ||
featureType == FEATURE_TYPE_CHROMA) {
// run internal processing at 11025 or thereabouts
int internalRate = 11025;
int decimationFactor = samplerate / internalRate;
@ -77,11 +77,11 @@ void ClusterMeltSegmenter::initialise(int fs)
constq = new ConstantQ(config);
constq->sparsekernel();
ncoeff = constq->getK();
fft = new FFTReal(constq->getfftlength());
} else if (featureType == FEATURE_TYPE_MFCC) {
// run internal processing at 22050 or thereabouts
@ -110,7 +110,7 @@ void ClusterMeltSegmenter::initialise(int fs)
}
}
ClusterMeltSegmenter::~ClusterMeltSegmenter()
ClusterMeltSegmenter::~ClusterMeltSegmenter()
{
delete window;
delete constq;
@ -164,7 +164,7 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
vector<double> cq(ncoeff);
for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
const double *psource = samples;
int pcount = nsamples;
@ -174,9 +174,9 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
decimator->process(samples, decout);
psource = decout;
}
int origin = 0;
// std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
int frames = 0;
@ -208,11 +208,11 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
}
window->cut(frame);
fft->process(false, frame, real, imag);
fft->forward(frame, real, imag);
constq->process(real, imag, cqre, cqim);
for (int i = 0; i < ncoeff; ++i) {
cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
}
@ -255,7 +255,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl
vector<double> cc(ncoeff);
for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
const double *psource = samples;
int pcount = nsamples;
@ -287,7 +287,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl
}
mfcc->process(frame, ccout);
for (int i = 0; i < ncoeff; ++i) {
cc[i] += ccout[i];
}
@ -330,44 +330,44 @@ void ClusterMeltSegmenter::segment()
decimator = 0;
if (features.size() < histogramLength) return;
/*
/*
std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
<< " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
*/
// copy the features to a native array and use the existing C segmenter...
double** arrFeatures = new double*[features.size()];
double** arrFeatures = new double*[features.size()];
for (int i = 0; i < features.size(); i++)
{
if (featureType == FEATURE_TYPE_UNKNOWN) {
arrFeatures[i] = new double[features[0].size()];
for (int j = 0; j < features[0].size(); j++)
arrFeatures[i][j] = features[i][j];
arrFeatures[i][j] = features[i][j];
} else {
arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
for (int j = 0; j < ncoeff; j++)
arrFeatures[i][j] = features[i][j];
arrFeatures[i][j] = features[i][j];
}
}
q = new int[features.size()];
if (featureType == FEATURE_TYPE_UNKNOWN ||
featureType == FEATURE_TYPE_MFCC)
cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
nclusters, neighbourhoodLimit);
else
constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
// convert the cluster assignment sequence to a segmentation
makeSegmentation(q, features.size());
makeSegmentation(q, features.size());
// de-allocate arrays
delete [] q;
for (int i = 0; i < features.size(); i++)
delete [] arrFeatures[i];
delete [] arrFeatures;
// clear the features
clear();
}
@ -377,11 +377,11 @@ void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
segmentation.segments.clear();
segmentation.nsegtypes = nclusters;
segmentation.samplerate = samplerate;
Segment segment;
segment.start = 0;
segment.type = q[0];
for (int i = 1; i < len; i++)
{
if (q[i] != q[i-1])

View file

@ -31,12 +31,12 @@ class ClusterMeltSegmenterParams
// defaults are sensible for 11025Hz with 0.2 second hopsize
{
public:
ClusterMeltSegmenterParams() :
ClusterMeltSegmenterParams() :
featureType(FEATURE_TYPE_CONSTQ),
hopSize(0.2),
windowSize(0.6),
fmin(62),
fmax(16000),
fmax(16000),
nbins(8),
ncomponents(20),
nHMMStates(40),
@ -72,34 +72,34 @@ public:
protected:
void makeSegmentation(int* q, int len);
void extractFeaturesConstQ(const double *, int);
void extractFeaturesMFCC(const double *, int);
Window<double> *window;
FFTReal *fft;
ConstantQ* constq;
ConstantQ* constq;
MFCC* mfcc;
model_t* model; // the HMM
int* q; // the decoded HMM state sequence
vector<vector<double> > histograms;
feature_types featureType;
vector<vector<double> > histograms;
feature_types featureType;
double hopSize; // in seconds
double windowSize; // in seconds
// constant-Q parameters
int fmin;
int fmax;
int nbins;
int ncoeff;
// PCA parameters
int ncomponents;
// HMM parameters
int nHMMStates;
// clustering parameters
int nclusters;
int histogramLength;

View file

@ -19,13 +19,13 @@
ostream& operator<<(ostream& os, const Segmentation& s)
{
os << "structure_name : begin_time end_time\n";
for (int i = 0; i < s.segments.size(); i++)
{
Segment seg = s.segments[i];
os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate)
os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate)
<< '\t' << std::setprecision(6) << seg.end / static_cast<double>(s.samplerate) << "\n";
}
return os;
}

View file

@ -35,7 +35,7 @@ class Segmentation
public:
int nsegtypes; // number of segment types, so possible types are {0,1,...,nsegtypes-1}
int samplerate;
vector<Segment> segments;
vector<Segment> segments;
};
ostream& operator<<(ostream& os, const Segmentation& s);
@ -52,7 +52,7 @@ public:
virtual void segment() = 0; // call once all the features have been extracted
virtual void segment(int m) = 0; // specify desired number of segment-types
virtual void clear() { features.clear(); }
const Segmentation& getSegmentation() const { return segmentation; }
const Segmentation& getSegmentation() const { return segmentation; }
protected:
vector<vector<double> > features;
Segmentation segmentation;

View file

@ -25,7 +25,7 @@ double kldist(double* a, double* b, int n) {
because a, b represent probability distributions */
double q, d;
int i;
d = 0;
for (i = 0; i < n; i++)
{
@ -38,8 +38,8 @@ double kldist(double* a, double* b, int n) {
d += b[i] * log(b[i] / q);
}
}
return d;
}
return d;
}
void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
double lambda, sum, beta, logsumexp, maxlp;
@ -48,9 +48,9 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
int** nc; /* neighbour counts for each histogram */
double** lp; /* soft assignment probs for each histogram */
int* oldc; /* previous hard assignments (to check convergence) */
/* NB h is passed as a 1d row major array */
/* parameter values */
lambda = DEFAULT_LAMBDA;
if (l > 0)
@ -60,22 +60,22 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
B = 2 * limit + 1;
maxiter0 = 20; /* number of iterations at initial temperature */
maxiter1 = 5; /* number of iterations at subsequent temperatures */
/* allocate memory */
/* allocate memory */
cl = (double**) malloc(k*sizeof(double*));
for (i= 0; i < k; i++)
cl[i] = (double*) malloc(m*sizeof(double));
nc = (int**) malloc(n*sizeof(int*));
for (i= 0; i < n; i++)
nc[i] = (int*) malloc(k*sizeof(int));
lp = (double**) malloc(n*sizeof(double*));
for (i= 0; i < n; i++)
lp[i] = (double*) malloc(k*sizeof(double));
oldc = (int*) malloc(n * sizeof(int));
/* initialise */
for (i = 0; i < k; i++)
{
@ -90,40 +90,40 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
{
cl[i][j] /= sum; /* normalise */
}
}
}
//print_array(cl, k, m);
for (i = 0; i < n; i++)
c[i] = 1; /* initially assign all histograms to cluster 1 */
for (a = 0; a < t; a++)
{
beta = Bsched[a];
if (a == 0)
maxiter = maxiter0;
else
maxiter = maxiter1;
for (it = 0; it < maxiter; it++)
{
//if (it == maxiter - 1)
// mexPrintf("hasn't converged after %d iterations\n", maxiter);
for (i = 0; i < n; i++)
{
/* save current hard assignments */
oldc[i] = c[i];
/* calculate soft assignment logprobs for each cluster */
sum = 0;
for (j = 0; j < k; j++)
{
lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
/* update matching neighbour counts for this histogram, based on current hard assignments */
/* old version:
nc[i][j] = 0;
nc[i][j] = 0;
if (i >= limit && i <= n - 1 - limit)
{
for (b = i - limit; b <= i + limit; b++)
@ -144,14 +144,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (b = b0; b <= b1; b++)
if (c[b] == j+1)
nc[i][j]--;
sum += exp(lp[i][j]);
}
/* normalise responsibilities and add duration logprior */
logsumexp = log(sum);
for (j = 0; j < k; j++)
lp[i][j] -= logsumexp + lambda * nc[i][j];
lp[i][j] -= logsumexp + lambda * nc[i][j];
}
//print_array(lp, n, k);
/*
@ -160,10 +160,10 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (j = 0; j < k; j++)
mexPrintf("%d ", nc[i][j]);
mexPrintf("\n");
}
}
*/
/* update the assignments now that we know the duration priors
based on the current assignments */
for (i = 0; i < n; i++)
@ -177,14 +177,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
c[i] = j+1;
}
}
/* break if assignments haven't changed */
i = 0;
while (i < n && oldc[i] == c[i])
i++;
if (i == n)
break;
/* update reference histograms now we know new responsibilities */
for (j = 0; j < k; j++)
{
@ -194,21 +194,21 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (i = 0; i < n; i++)
{
cl[j][b] += exp(lp[i][j]) * h[i*m+b];
}
}
}
sum = 0;
sum = 0;
for (i = 0; i < n; i++)
sum += exp(lp[i][j]);
for (b = 0; b < m; b++)
cl[j][b] /= sum; /* normalise */
}
}
//print_array(cl, k, m);
//mexPrintf("\n\n");
}
}
/* free memory */
for (i = 0; i < k; i++)
free(cl[i]);
@ -219,7 +219,7 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (i = 0; i < n; i++)
free(lp[i]);
free(lp);
free(oldc);
free(oldc);
}

View file

@ -25,7 +25,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
int t, b, oct, ix;
//double maxchroma; /* max chroma value at each time, for normalisation */
//double sum; /* for normalisation */
for (t = 0; t < nframes; t++)
{
for (b = 0; b < bins; b++)
@ -50,7 +50,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
maxchroma = chroma[t][b];
if (maxchroma > 0)
for (b = 0; b < bins; b++)
chroma[t][b] /= maxchroma;
chroma[t][b] /= maxchroma;
*/
}
}
@ -62,13 +62,13 @@ void mpeg7_constq(double** features, int nframes, int ncoeff)
double ss;
double env;
double maxenv = 0;
/* convert const-Q features to dB scale */
for (i = 0; i < nframes; i++)
for (j = 0; j < ncoeff; j++)
features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
/* normalise each feature vector and add the norm as an extra feature dimension */
/* normalise each feature vector and add the norm as an extra feature dimension */
for (i = 0; i < nframes; i++)
{
ss = 0;
@ -80,10 +80,10 @@ void mpeg7_constq(double** features, int nframes, int ncoeff)
features[i][ncoeff] = env;
if (env > maxenv)
maxenv = env;
}
}
/* normalise the envelopes */
for (i = 0; i < nframes; i++)
features[i][ncoeff] /= maxenv;
features[i][ncoeff] /= maxenv;
}
/* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
@ -94,7 +94,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
int i, j, t;
double norm;
for (i = 0; i < nx*m; i++)
for (i = 0; i < nx*m; i++)
h[i] = 0;
for (i = hlen/2; i < nx-hlen/2; i++)
@ -109,7 +109,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
for (j = 0; j < m; j++)
h[i*m+j] /= norm;
}
/* duplicate histograms at beginning and end to create one histogram for each data value supplied */
for (i = 0; i < hlen/2; i++)
for (j = 0; j < m; j++)
@ -120,11 +120,11 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
}
/* segment using HMM and then histogram clustering */
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
int histogram_length, int nclusters, int neighbour_limit)
{
int i, j;
/*****************************/
if (0) {
/* try just using the predominant bin number as a 'decoded state' */
@ -137,60 +137,60 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
maxval = 0;
for (j = 0; j < feature_length; j++)
{
if (features[i][j] > maxval)
if (features[i][j] > maxval)
{
maxval = features[i][j];
maxbin = j;
}
}
}
if (maxval > chroma_thresh)
q[i] = maxbin;
else
q[i] = feature_length;
}
}
if (1) {
/*****************************/
/* scale all the features to 'balance covariances' during HMM training */
double scale = 10;
for (i = 0; i < frames_read; i++)
for (j = 0; j < feature_length; j++)
features[i][j] *= scale;
/* train an HMM on the features */
/* create a model */
model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
/* train the model */
hmm_train(features, frames_read, model);
/*
/*
printf("\n\nafter training:\n");
hmm_print(model);
*/
*/
/* decode the hidden state sequence */
viterbi_decode(features, frames_read, model, q);
viterbi_decode(features, frames_read, model, q);
hmm_close(model);
/*****************************/
}
/*****************************/
/*
fprintf(stderr, "HMM state sequence:\n");
for (i = 0; i < frames_read; i++)
fprintf(stderr, "%d ", q[i]);
fprintf(stderr, "\n\n");
*/
/* create histograms of states */
double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
create_histograms(q, frames_read, nHMM_states, histogram_length, h);
/* cluster the histograms */
int nbsched = 20; /* length of inverse temperature schedule */
double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
@ -200,39 +200,39 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
for (i = 1; i < nbsched; i++)
bsched[i] = alpha * bsched[i-1];
cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
/* now q holds a sequence of cluster assignments */
free(h);
free(h);
free(bsched);
}
/* segment constant-Q or chroma features */
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
{
int feature_length;
double** chroma;
int i;
if (feature_type == FEATURE_TYPE_CONSTQ)
{
/* fprintf(stderr, "Converting to dB and normalising...\n");
*/
*/
mpeg7_constq(features, frames_read, ncoeff);
/*
/*
fprintf(stderr, "Running PCA...\n");
*/
*/
/* do PCA on the features (but not the envelope) */
int ncomponents = 20;
pca_project(features, frames_read, ncoeff, ncomponents);
/* copy the envelope so that it immediatly follows the chosen components */
for (i = 0; i < frames_read; i++)
features[i][ncomponents] = features[i][ncoeff];
features[i][ncomponents] = features[i][ncoeff];
feature_length = ncomponents + 1;
/**************************************
//TEST
// feature file name
@ -241,7 +241,7 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
strcpy(file_name, dir);
strcat(file_name, trackname);
strcat(file_name, "_features_c20r8h0.2f0.6.mat");
// get the features from Matlab from mat-file
int frames_in_file;
readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
@ -254,27 +254,27 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
--missing_frames;
}
free(file_name);
******************************************/
cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
}
if (feature_type == FEATURE_TYPE_CHROMA)
{
/*
fprintf(stderr, "Converting to chroma features...\n");
*/
*/
/* convert constant-Q to normalised chroma features */
chroma = (double**) malloc(frames_read*sizeof(double*));
for (i = 0; i < frames_read; i++)
chroma[i] = (double*) malloc(bins*sizeof(double));
cq2chroma(features, frames_read, ncoeff, bins, chroma);
feature_length = bins;
cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
for (i = 0; i < frames_read; i++)
free(chroma[i]);
free(chroma);

View file

@ -38,10 +38,10 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma);
void create_histograms(int* x, int nx, int m, int hlen, double* h);
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
int histogram_length, int nclusters, int neighbour_limit);
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
int nHMM_states, int histogram_length, int nclusters, int neighbour_limit);
#ifdef __cplusplus

View file

@ -34,10 +34,10 @@ typedef struct segmentation_t
segment_t* segments;
} segmentation_t;
typedef enum
{
FEATURE_TYPE_UNKNOWN = 0,
FEATURE_TYPE_CONSTQ = 1,
typedef enum
{
FEATURE_TYPE_UNKNOWN = 0,
FEATURE_TYPE_CONSTQ = 1,
FEATURE_TYPE_CHROMA = 2,
FEATURE_TYPE_MFCC = 3
} feature_types;