* bug in org-store-link
@ 2008-02-26 20:40 Scott Otterson
2008-02-27 14:55 ` Carsten Dominik
0 siblings, 1 reply; 7+ messages in thread
From: Scott Otterson @ 2008-02-26 20:40 UTC (permalink / raw)
To: emacs-orgmode
[-- Attachment #1: Type: text/plain, Size: 600 bytes --]
Small bug in org store link. To reproduce, put the cursor on line 1007
and run org-store-link. Then use the result to create a hyperlink in an
org file, which for me looks like:
[[file:~/lib/c/pkgs/quicknet/qnstrn.cc::ftr1_window_offset%20ftr1_window_len][call]]
Then click on that hyperlink. I get sent to line 899 instead of line
1007.
It looks like the reason is that org tosses out puncutation (+>,). I've
found that, when linking to source code, punctuation is a big deal, so,
if possible, it would be nice if org mode was made sensitive to it.
Keep up the good work,
Scott
[-- Attachment #2: qnstrn.cc --]
[-- Type: text/x-c++src, Size: 47989 bytes --]
#ifndef NO_RCSID
const char* qnstrn_rcsid =
"$Header: /homes/scotto/lib/cvsroot/lib/c/pkgs/quicknet/qnstrn.cc,v 1.5 2007/01/24 00:07:46 scotto Exp $";
#endif
#include <QN_config.h>
#include <assert.h>
#include <float.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#ifdef QN_HAVE_LIMITS_H
#include <limits.h>
#endif
#ifndef EXIT_SUCCESS
#define EXIT_SUCCESS (0)
#define EXIT_FAILURE (1)
#endif
#include <sys/types.h>
#ifdef QN_HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#ifdef QN_HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#include <unistd.h>
#if !QN_HAVE_DECL_SRAND48
extern "C" {
void srand48(long);
}
#endif
#ifdef QN_HAVE_SET_NEW_HANDLER
extern "C" {
typedef void (*new_handler)(void);
new_handler set_new_handler (new_handler);
}
#endif
#ifndef FILENAME_MAX
#define FILENAME_MAX (MAXPATHLEN)
#endif
#include "QuickNet.h"
static struct {
char* ftr1_file;
char* ftr1_format;
int ftr1_width;
char* ftr1_conf_file;
char* ftr2_file;
char* ftr2_format;
int ftr2_width;
char* unary_file;
char* hardtarget_file;
char* hardtarget_format;
char* softtarget_file;
char* softtarget_format;
int softtarget_width;
char* ftr1_norm_file;
char* ftr2_norm_file;
int ftr1_ftr_start;
int ftr2_ftr_start;
int ftr1_ftr_count;
int ftr2_ftr_count;
int hardtarget_lastlab_reject;
int window_extent;
int ftr1_window_offset;
int ftr2_window_offset;
int unary_window_offset;
int hardtarget_window_offset;
int softtarget_window_offset;
int ftr1_window_len;
int ftr2_window_len;
int ftr1_delta_order;
int ftr1_delta_win;
char* ftr1_norm_mode_str;
int ftr1_norm_mode;
double ftr1_norm_am;
double ftr1_norm_av;
int ftr2_delta_order;
int ftr2_delta_win;
char* ftr2_norm_mode_str;
int ftr2_norm_mode;
double ftr2_norm_am;
double ftr2_norm_av;
long train_cache_frames;
int train_cache_seed;
long train_sent_start;
long train_sent_count;
char* train_sent_range;
long cv_sent_start;
long cv_sent_count;
char* cv_sent_range;
QN_Arg_ListFloat init_random_bias_min;
QN_Arg_ListFloat init_random_bias_max;
QN_Arg_ListFloat init_random_weight_min;
QN_Arg_ListFloat init_random_weight_max;
int init_random_seed;
char* init_weight_file;
char* log_weight_file;
char* out_weight_file;
char* learnrate_schedule;
QN_Arg_ListFloat learnrate_vals;
long learnrate_epochs;
float learnrate_scale;
int unary_size;
int mlp3_input_size;
int mlp3_hidden_size;
int mlp3_output_size;
char* mlp3_output_type;
int mlp3_fx; // NO LONGER USED
int mlp3_weight_bits; // NO LONGER USED
int mlp3_in2hid_exp; // NO LONGER USED
int mlp3_hid2out_exp; // NO LONGER USED
int mlp3_bunch_size;
int mlp3_blas;
int mlp3_pp;
int threads;
int slaves; // NO LONGER USED
char *cpu; // NO LONGER USED
char* log_file; // Stream for storing status messages.
int verbose;
int debug; // Debug level.
} config;
static void
set_defaults(void)
{
static float default_learnrate[1] = { 0.008 };
static float default_bias_min[1] = { -4.1 };
static float default_bias_max[1] = { -3.9 };
static float default_weight_min[1] = { -0.1 };
static float default_weight_max[1] = { 0.1 };
config.ftr1_file = "";
config.ftr1_format = "pfile";
config.ftr1_width = 0;
config.ftr1_conf_file = "";
config.ftr2_file = "";
config.ftr2_format = "pfile";
config.ftr2_width = 0;
config.unary_file = "";
config.hardtarget_file = "";
config.hardtarget_format = "";
config.softtarget_file = "";
config.softtarget_format = "pfile";
config.softtarget_width = 0;
config.ftr1_norm_file = "";
config.ftr2_norm_file = "";
config.ftr1_ftr_start = 0;
config.ftr2_ftr_start = 0;
config.ftr1_ftr_count = 0;
config.ftr2_ftr_count = 0;
config.hardtarget_lastlab_reject = 0;
config.window_extent = 9;
config.ftr1_window_offset = 0;
config.ftr2_window_offset = 4;
config.unary_window_offset = 3;
config.hardtarget_window_offset = 0;
config.softtarget_window_offset = 0;
config.ftr1_window_len = 9;
config.ftr2_window_len = 0;
config.ftr1_delta_order = 0;
config.ftr1_delta_win = 9;
config.ftr1_norm_mode_str = NULL;
config.ftr1_norm_mode = QN_NORM_FILE;
config.ftr1_norm_am = QN_DFLT_NORM_AM;
config.ftr1_norm_av = QN_DFLT_NORM_AV;
config.ftr2_delta_order = 0;
config.ftr2_delta_win = 9;
config.ftr2_norm_mode_str = NULL;
config.ftr2_norm_mode = QN_NORM_FILE;
config.ftr2_norm_am = QN_DFLT_NORM_AM;
config.ftr2_norm_av = QN_DFLT_NORM_AV;
config.train_cache_frames = 10000;
config.train_cache_seed = 0;
config.train_sent_start = 0;
config.train_sent_count = INT_MAX;
config.train_sent_range = 0;
config.cv_sent_start = 0;
config.cv_sent_count = INT_MAX;
config.cv_sent_range = 0;
config.init_random_bias_min.count = 1;
config.init_random_bias_min.vals = &default_bias_min[0];
config.init_random_bias_max.count = 1;
config.init_random_bias_max.vals = &default_bias_max[0];
config.init_random_weight_min.count = 1;
config.init_random_weight_min.vals = &default_weight_min[0];
config.init_random_weight_max.count = 1;
config.init_random_weight_max.vals = &default_weight_max[0];
config.init_random_seed = 0;
config.init_weight_file = "";
config.log_weight_file = "log%p.weights";
config.out_weight_file = "out.weights";
config.learnrate_schedule = "newbob";
config.learnrate_vals.count = 1;
config.learnrate_vals.vals = &default_learnrate[0];
config.learnrate_epochs = 9999;
config.learnrate_scale = 0.5;
config.unary_size = 0;
config.mlp3_input_size = 153;
config.mlp3_hidden_size = 200;
config.mlp3_output_size = 56;
config.mlp3_output_type = "softmax";
config.mlp3_fx = 0;
config.mlp3_weight_bits = 32;
config.mlp3_in2hid_exp = 2;
config.mlp3_hid2out_exp = 2;
config.mlp3_bunch_size = 16;
#ifdef QN_HAVE_LIBBLAS
config.mlp3_blas = 1;
#else
config.mlp3_blas = 0;
#endif
config.mlp3_pp = 1;
config.threads = 1;
config.slaves = 0;
config.cpu = "host";
config.log_file = "-";
config.verbose = 0;
config.debug = 0;
}
QN_ArgEntry argtab[] =
{
{ NULL, "QuickNet MLP training program version " QN_VERSION, QN_ARG_DESC },
{ "ftr1_file", "Input feature file", QN_ARG_STR,
&(config.ftr1_file), QN_ARG_REQ },
{ "ftr1_format", "Main feature file format [pfile,pre,lna,onlftr,srifile,srilist]", QN_ARG_STR,
&(config.ftr1_format) },
{ "ftr1_width", "Main feature file feature columns", QN_ARG_INT,
&(config.ftr1_width) },
{ "ftr1_conf_file", "Confidences for ftr1. Format and number of frames matches ftr1. If confidence dimension is 1, then the weight will be applied across all elements in a feature frame; otherwise, the dimension must match ft1. ftr2 confs not implemented",
QN_ARG_STR, &(config.ftr1_conf_file) },
{ "ftr2_file", "Second input feature file", QN_ARG_STR,
&(config.ftr2_file) },
{ "ftr2_format","Secondary feature file format [pfile,pre,lna,onlftr,srifile,srilist]", QN_ARG_STR,
&(config.ftr2_format) },
{ "ftr2_width", "Secondary feature file feature columns", QN_ARG_INT,
&(config.ftr2_width) },
{ "unary_file", "Auxilliary unary file", QN_ARG_STR,
&(config.unary_file) },
{ "hardtarget_file", "Target label file", QN_ARG_STR,
&(config.hardtarget_file) },
{ "hardtarget_format", "Target label file format [pfile,pre,ilab]", QN_ARG_STR,
&(config.hardtarget_format) },
{ "softtarget_file", "Target feature file", QN_ARG_STR,
&(config.softtarget_file) },
{ "softtarget_format", "Target feature file format [pfile,pre,lna,onlftr]", QN_ARG_STR,
&(config.softtarget_format) },
{ "softtarget_width", "Target feature file feature columns", QN_ARG_INT,
&(config.softtarget_width) },
{ "ftr1_norm_file", "Normalization parameters for ftr1_file", QN_ARG_STR,
&(config.ftr1_norm_file) },
{ "ftr2_norm_file", "Normalization parameters for ftr2_file", QN_ARG_STR,
&(config.ftr2_norm_file) },
{ "ftr1_ftr_start", "First feature used from ftr1_file",
QN_ARG_INT, &(config.ftr1_ftr_start) },
{ "ftr2_ftr_start", "First feature used from ftr2_file",
QN_ARG_INT, &(config.ftr2_ftr_start) },
{ "ftr1_ftr_count", "Number of features used from ftr1_file",
QN_ARG_INT, &(config.ftr1_ftr_count) },
{ "ftr2_ftr_count", "Number of features used from ftr2_file",
QN_ARG_INT, &(config.ftr2_ftr_count) },
{ "hardtarget_lastlab_reject", "Last label value indicates no-train frames",
QN_ARG_BOOL, &(config.hardtarget_lastlab_reject) },
{ "window_extent", "Extent of all windows (frames)", QN_ARG_INT,
&(config.window_extent) },
{ "ftr1_window_offset", "Offset of window on ftr1_file (frames)",
QN_ARG_INT, &(config.ftr1_window_offset) },
{ "ftr2_window_offset", "Offset of window on ftr2_file (frames)",
QN_ARG_INT, &(config.ftr2_window_offset) },
{ "unary_window_offset", "Offset of window on unary_file (frames)",
QN_ARG_INT, &(config.unary_window_offset) },
{ "hardtarget_window_offset", "Offset of window on target label file (frames)",
QN_ARG_INT, &(config.hardtarget_window_offset) },
{ "softtarget_window_offset", "Offset of window on target feature file (frames)",
QN_ARG_INT, &(config.softtarget_window_offset) },
{ "ftr1_window_len", "Length of window on ftr1_file (frames)", QN_ARG_INT,
&(config.ftr1_window_len) },
{ "ftr2_window_len", "Length of window on ftr2_file (frames)", QN_ARG_INT,
&(config.ftr2_window_len) },
{ "ftr1_delta_order", "Order of derivatives added to ftr1_file", QN_ARG_INT,
&(config.ftr1_delta_order) },
{ "ftr1_delta_win", "Window size for ftr1_file delta-calculation", QN_ARG_INT,
&(config.ftr1_delta_win) },
{ "ftr1_norm_mode", "Normalization mode (file/utts/online)", QN_ARG_STR,
&(config.ftr1_norm_mode_str) },
{ "ftr1_norm_alpha_m", "Update constant for online norm means", QN_ARG_DOUBLE,
&(config.ftr1_norm_am) },
{ "ftr1_norm_alpha_v", "Update constant for online norm vars", QN_ARG_DOUBLE,
&(config.ftr1_norm_av) },
{ "ftr2_delta_order", "Order of derivatives added to ftr2_file", QN_ARG_INT,
&(config.ftr2_delta_order) },
{ "ftr2_delta_win", "Window size for ftr2_file delta-calculation", QN_ARG_INT,
&(config.ftr2_delta_win) },
{ "ftr2_norm_mode", "Normalization mode (file/utts/online)", QN_ARG_STR,
&(config.ftr2_norm_mode_str) },
{ "ftr2_norm_alpha_m", "Update constant for online norm means", QN_ARG_DOUBLE,
&(config.ftr2_norm_am) },
{ "ftr2_norm_alpha_v", "Update constant for online norm vars", QN_ARG_DOUBLE,
&(config.ftr2_norm_av) },
{ "train_cache_frames", "Number of training frames in cache",
QN_ARG_LONG, &(config.train_cache_frames) },
{ "train_cache_seed", "Training presentation randomization seed",
QN_ARG_INT, &(config.train_cache_seed) },
{ "train_sent_start", "Number of first training sentence",
QN_ARG_LONG, &(config.train_sent_start) },
{ "train_sent_count", "Number of training sentences",
QN_ARG_LONG, &(config.train_sent_count) },
{ "train_sent_range", "Training sentence indices in QN_Range(3) format",
QN_ARG_STR, &(config.train_sent_range) },
{ "cv_sent_start", "Number of first cross validation sentence",
QN_ARG_LONG, &(config.cv_sent_start) },
{ "cv_sent_count", "Number of cross validation sentences",
QN_ARG_LONG, &(config.cv_sent_count) },
{ "cv_sent_range", "Cross validation sentence indices in QN_Range(3) format",
QN_ARG_STR, &(config.cv_sent_range) },
{ "init_random_bias_min", "Minimum random bias (per layer)", QN_ARG_LIST_FLOAT,
&(config.init_random_bias_min) },
{ "init_random_bias_max", "Maximum random bias (per layer)", QN_ARG_LIST_FLOAT,
&(config.init_random_bias_max) },
{ "init_random_weight_min", "Minimum random weight (per layer)", QN_ARG_LIST_FLOAT,
&(config.init_random_weight_min) },
{ "init_random_weight_max", "Maximum random weight (per layer)", QN_ARG_LIST_FLOAT,
&(config.init_random_weight_max) },
{ "init_random_seed", "Net initialization random number seed",
QN_ARG_INT, &(config.init_random_seed) },
{ "init_weight_file", "Input weight file", QN_ARG_STR,
&(config.init_weight_file) },
{ "log_weight_file", "Log weight file", QN_ARG_STR,
&(config.log_weight_file) },
{ "out_weight_file", "Output weight file", QN_ARG_STR,
&(config.out_weight_file) },
{ "learnrate_schedule", "LR schedule type [newbob,list,smoothdecay]",
QN_ARG_STR, &(config.learnrate_schedule) },
{ "learnrate_vals", "Learning rates",
QN_ARG_LIST_FLOAT, &(config.learnrate_vals) },
{ "learnrate_epochs", "Maximum number of epochs", QN_ARG_LONG,
&(config.learnrate_epochs) },
{ "learnrate_scale", "Scale factor of successive learning rates", QN_ARG_FLOAT,
&(config.learnrate_scale) },
{ "unary_size", "Number of unary inputs to net",
QN_ARG_INT, &(config.unary_size)},
{ "mlp3_input_size", "Number of units in input layer",
QN_ARG_INT, &(config.mlp3_input_size)},
{ "mlp3_hidden_size","Number of units in hidden layer",
QN_ARG_INT, &(config.mlp3_hidden_size) },
{ "mlp3_output_size","Number of units in output layer",
QN_ARG_INT, &(config.mlp3_output_size) },
{ "mlp3_output_type","Type of non-linearity in MLP output layer [sigmoid,sigmoidx,softmax]",
QN_ARG_STR, &(config.mlp3_output_type) },
{ "mlp3_fx","NO LONGER USED",
QN_ARG_BOOL, &(config.mlp3_fx) },
{ "mlp3_weight_bits","NO LONGER USED",
QN_ARG_INT, &(config.mlp3_weight_bits) },
{ "mlp3_in2hid_exp","NO LONGER USED",
QN_ARG_INT, &(config.mlp3_in2hid_exp) },
{ "mlp3_hid2out_exp","NO LONGER USED",
QN_ARG_INT, &(config.mlp3_hid2out_exp) },
{ "mlp3_bunch_size","Size of bunches used in MLP training",
QN_ARG_INT, &(config.mlp3_bunch_size) },
{ "mlp3_blas","Use BLAS libraries",
QN_ARG_BOOL, &(config.mlp3_blas) },
{ "mlp3_pp","Use internal high-performance libraries",
QN_ARG_BOOL, &(config.mlp3_pp) },
{ "mlp3_threads","Number of threads in MLP object",
QN_ARG_INT, &(config.threads) },
{ "slaves","NO LONGER USED",
QN_ARG_INT, &(config.slaves) },
{ "cpu","NO LONGER USED",
QN_ARG_STR, &(config.cpu) },
{ "log_file", "File for status messages", QN_ARG_STR, &(config.log_file) },
{ "verbose", "Output extra status messages",
QN_ARG_BOOL, &(config.verbose) },
{ "debug", "Level of internal diagnostic output",
QN_ARG_INT, &(config.debug) },
{ NULL, NULL, QN_ARG_NOMOREARGS }
};
// QN_open_ftrstream, QN_open_ftrfile and QN_close_ftrfiles all moved to QN_utils.cc
// A function to create a train and cross validation stream for a given
// feature file. Also handles opening multiple files if
// stream comes from a sequence of files.
void
create_ftrstreams(int debug, const char* dbgname, char* filename,
const char* format, size_t width,
FILE* normfile, size_t first_ftr, size_t num_ftrs,
size_t train_sent_start, size_t train_sent_count,
char* train_sent_range,
size_t cv_sent_start, size_t cv_sent_count,
char* cv_sent_range,
size_t window_extent, size_t window_offset,
size_t window_len,
int delta_order, int delta_win,
int norm_mode, double norm_am, double norm_av,
size_t train_cache_frames, int train_cache_seed,
QN_InFtrStream** train_str_ptr, QN_InFtrStream** cv_str_ptr)
{
QN_InFtrStream* ftr_str = NULL; // Temporary stream holder.
int index = 1; // training always requires indexed
int buffer_frames = 500;
ftr_str = QN_build_ftrstream(debug, dbgname, filename, format,
width, index, normfile,
first_ftr, num_ftrs,
0, QN_ALL, // do utt selection ourselves
buffer_frames,
delta_order, delta_win,
norm_mode, norm_am, norm_av);
// Create training and cross-validation streams.
QN_InFtrStream_Cut* train_ftr_str = NULL;
QN_InFtrStream_Cut2* cv_ftr_str = NULL;
if (train_sent_range != 0) {
if ( !(train_sent_start == 0 && train_sent_count == QN_ALL) ) {
QN_ERROR("create_ftrstreams",
"You cannot specify train_sents by both range "
"and start/count.");
}
}
if (cv_sent_range != 0) {
if ( !(cv_sent_start == 0 && cv_sent_count == QN_ALL) ) {
QN_ERROR("create_ftrstreams",
"You cannot specify cv_sents by both range "
"and start/count.");
}
}
if ( (train_sent_range == 0 && cv_sent_range != 0) \
|| (train_sent_range != 0 && cv_sent_range == 0) ) {
QN_ERROR("create_ftrstreams",
"If you use ranges for one of train_sents or cv_sents, "
"you must use it for both.");
}
if (train_sent_range == 0) {
// Using old-style start & count, not range strings
QN_InFtrStream_Cut* fwd_ftr_str
= new QN_InFtrStream_Cut(debug, dbgname, *ftr_str,
train_sent_start,
train_sent_count,
cv_sent_start,
cv_sent_count);
train_ftr_str = (QN_InFtrStream_Cut*)fwd_ftr_str;
} else {
// Using range strings
QN_InFtrStream_CutRange* fwd_ftr_str
= new QN_InFtrStream_CutRange(debug, dbgname, *ftr_str,
train_sent_range,
cv_sent_range);
train_ftr_str = (QN_InFtrStream_Cut*)fwd_ftr_str;
}
cv_ftr_str = new QN_InFtrStream_Cut2(*train_ftr_str);
// Create training and CV windows.
size_t bot_margin = window_extent - window_offset - window_len;
QN_InFtrStream_RandWindow* train_winftr_str =
new QN_InFtrStream_RandWindow(debug, dbgname,
*train_ftr_str, window_len,
window_offset, bot_margin,
train_cache_frames, train_cache_seed
);
QN_InFtrStream_SeqWindow* cv_winftr_str =
new QN_InFtrStream_SeqWindow(debug, dbgname,
*cv_ftr_str, window_len,
window_offset, bot_margin
);
*train_str_ptr = train_winftr_str;
*cv_str_ptr = cv_winftr_str;
}
// A function to create a train and cross validation stream for a given
// label file.
void
create_labstreams(int debug, const char* dbgname, FILE* hardtarget_file,
const char* format, size_t width,
size_t train_sent_start, size_t train_sent_count,
char* train_sent_range,
size_t cv_sent_start, size_t cv_sent_count,
char* cv_sent_range,
size_t window_extent, size_t window_offset,
size_t train_cache_frames, int train_cache_seed,
QN_InLabStream** train_str_ptr, QN_InLabStream** cv_str_ptr)
{
QN_InLabStream* lab_str; // Temporary stream holder.
// Convert the file descriptor into a stream.
if (strcmp(format, "pfile")==0)
{
QN_InFtrLabStream_PFile* pfile_str =
new QN_InFtrLabStream_PFile(debug, // Select debugging.
dbgname, // Debugging tag.
hardtarget_file, // Label file.
1 // Indexed flag.
);
if (pfile_str->num_labs()!=1)
{
QN_ERROR("create_labstreams",
"Label file has %lu features, should only be 1.",
(unsigned long) pfile_str->num_labs() );
}
lab_str = pfile_str;
}
else if (strcmp(format, "pre")==0)
{
QN_InFtrLabStream_PreFile* prefile_str =
new QN_InFtrLabStream_PreFile(debug, // Select debugging.
dbgname, // Debugging tag.
hardtarget_file, // Label file.
width, // No of ftrs.
1 // Indexed flag.
);
lab_str = prefile_str;
}
else if (strcmp(format, "ilab")==0)
{
QN_InLabStream_ILab* ilab_str =
new QN_InLabStream_ILab(debug, // Select debugging.
dbgname, // Debugging tag.
hardtarget_file, // Label file.
1 // Indexed flag.
);
lab_str = ilab_str;
}
else
{
QN_ERROR(dbgname, "unknown label file format '%s'.", format);
lab_str = NULL;
}
// Create training and cross-validation streams.
QN_InLabStream_Cut* train_lab_str = NULL;
QN_InLabStream_Cut2* cv_lab_str = NULL;
if (train_sent_range != 0) {
if ( !(train_sent_start == 0 && train_sent_count == QN_ALL) ) {
QN_ERROR("create_labstreams",
"You cannot specify train_sents by both range "
"and start/count.");
}
}
if (cv_sent_range != 0) {
if ( !(cv_sent_start == 0 && cv_sent_count == QN_ALL) ) {
QN_ERROR("create_labstreams",
"You cannot specify cv_sents by both range "
"and start/count.");
}
}
if ( (train_sent_range == 0 && cv_sent_range != 0) \
|| (train_sent_range != 0 && cv_sent_range == 0) ) {
QN_ERROR("create_labstreams",
"If you use ranges for one of train_sents or cv_sents, "
"you must use it for both.");
}
if (train_sent_range == 0) {
// Using old-style start & count, not range strings
QN_InLabStream_Cut* fwd_lab_str
= new QN_InLabStream_Cut(debug, dbgname, *lab_str,
train_sent_start,
train_sent_count,
cv_sent_start,
cv_sent_count);
train_lab_str = (QN_InLabStream_Cut*)fwd_lab_str;
} else {
// Using range strings
QN_InLabStream_CutRange* fwd_lab_str
= new QN_InLabStream_CutRange(debug, dbgname, *lab_str,
train_sent_range,
cv_sent_range);
train_lab_str = (QN_InLabStream_Cut*)fwd_lab_str;
}
cv_lab_str = new QN_InLabStream_Cut2(*train_lab_str);
// Create training and CV windows.
const size_t window_len = 1;
size_t bot_margin = window_extent - window_offset - window_len;
QN_InLabStream_RandWindow* train_winlab_str =
new QN_InLabStream_RandWindow(debug, dbgname,
*train_lab_str, window_len,
window_offset, bot_margin,
train_cache_frames, train_cache_seed
);
QN_InLabStream_SeqWindow* cv_winlab_str =
new QN_InLabStream_SeqWindow(debug, dbgname,
*cv_lab_str, window_len,
window_offset, bot_margin
);
*train_str_ptr = train_winlab_str;
*cv_str_ptr = cv_winlab_str;
}
void
create_mlp(int debug, const char*,
size_t n_input, size_t n_hidden, size_t n_output,
const char* mlp3_output_type, int mlp3_bunch_size,
int threads, bool hasConf, QN_MLP** mlp_ptr)
{
// Create MLP and load weights.
QN_MLP* mlp3 = NULL;
QN_OutputLayerType outlayer_type;
if (strcmp(mlp3_output_type, "sigmoid")==0) {
outlayer_type = QN_OUTPUT_SIGMOID;
} else if (strcmp(mlp3_output_type, "sigmoidx")==0) {
outlayer_type = QN_OUTPUT_SIGMOID_XENTROPY;
} else if (strcmp(mlp3_output_type, "softmax")==0) {
outlayer_type = QN_OUTPUT_SOFTMAX;
} else {
QN_ERROR("create_mlp", "unknown output unit type '%s'.",
mlp3_output_type);
outlayer_type = QN_OUTPUT_SIGMOID;
}
if (mlp3_bunch_size == 0) {
assert(!hasConf); // confidences not implemented
// NOT bunch
if (config.threads==1)
{
mlp3 = new QN_MLP_OnlineFl3(debug, "train",
n_input, n_hidden, n_output,
outlayer_type);
}
else
{
QN_ERROR("create_mlp", "threads must be 1 for online "
"training.");
}
} else {
// Bunch
if (threads>1)
{
#ifdef QN_HAVE_LIBPTHREAD
if (threads>mlp3_bunch_size)
{
QN_ERROR("create_mlp", "number of threads must "
"be less than the bunch size.");
}
else
{
// Bunch threaded
assert(!hasConf); // confidences not implemented
mlp3 = new QN_MLP_ThreadFl3(debug, "train",
n_input, n_hidden,
n_output,
outlayer_type,
mlp3_bunch_size,
threads);
}
#else
QN_ERROR("create_mlp",
"cannot use multiple threads as libpthread "
"was not linked with this executable.");
#endif
}
else if (threads==1)
{
// Bunch unthreaded
mlp3 = new QN_MLP_BunchFl3(debug, "train",
n_input, n_hidden,
n_output, outlayer_type,
mlp3_bunch_size);
}
else
{
QN_ERROR("create_mlp","threads must be >= 1.");
}
}
*mlp_ptr = mlp3;
}
void
create_learnrate_schedule(int, const char*,
const char* learnrate_schedule,
float* learnrate_vals,
size_t learnrate_count,
float learnrate_scale,
size_t learnrate_epochs,
QN_RateSchedule** lr_schedule)
{
QN_RateSchedule* rate_sched;
if (learnrate_scale>1.0)
{
QN_ERROR("create_learnrate_schedule", "Learning rate scale is %g, but "
"it should be less that 1.0.");
}
if (strcmp(learnrate_schedule, "newbob")==0)
{
rate_sched = new QN_RateSchedule_NewBoB(*learnrate_vals,
learnrate_scale,
0.5f, 0.5f,
100.0f,learnrate_epochs);
}
else if (strcmp(learnrate_schedule, "list")==0)
{
long count;
if (learnrate_epochs < learnrate_count)
count = learnrate_epochs;
else
count = learnrate_count;
rate_sched = new QN_RateSchedule_List(learnrate_vals, count);
}
else if (strcmp(learnrate_schedule, "smoothdecay")==0)
{
size_t search_epochs;
if (learnrate_count<3 || learnrate_count>4) {
QN_ERROR(NULL,"learnrate_vals should have 3 or 4 values if learnrate_schedule is smoothdecay");
}
if (learnrate_count==4) {
search_epochs=(size_t)learnrate_vals[3];
} else {
search_epochs=1;
}
QN_OUTPUT("Setting up smooth decay learning rate (lr=%.6f,decay=%.6f,stopcriterion=%.6f",learnrate_vals[0],learnrate_vals[1],learnrate_vals[2]);
rate_sched = new QN_RateSchedule_SmoothDecay(learnrate_vals[0],
learnrate_vals[1],
learnrate_vals[2],
search_epochs,
100.0f, 0,
learnrate_epochs);
}
else
{
QN_ERROR("create_learnrate_schedule",
"Unknown learning rate schedule '%s'.",
learnrate_schedule);
rate_sched = NULL;
}
*lr_schedule = rate_sched;
}
void
qnstrn()
{
int verbose = config.verbose;
time_t now;
time(&now);
// A note for the logfile, including some system info.
QN_output_sysinfo("qnstrn");
QN_OUTPUT("Program start: %.24s.", ctime(&now));
// Open files and provisionally check arguments.
if (verbose>0)
{
QN_OUTPUT("Opening feature file...");
}
// ftr files are now opened inside create_ftrstreams in order to
// accommodate multiple pasted-together files
// ftr1_file.
// enum { FTRFILE1_BUF_SIZE = 0x8000 };
// const char* ftr1_file = config.ftr1_file;
// FILE* ftr1_fp = QN_open(ftr1_file, "r");
// ftr2_file.
// enum { FTRFILE2_BUF_SIZE = 0x8000 };
// const char* ftr2_file = config.ftr2_file;
// FILE* ftr2_fp = NULL;
// char* ftr2_buf = NULL;
// if (strcmp(ftr2_file, "")!=0)
// {
// ftr2_fp = QN_open(ftr2_file, "r");
// }
bool hasConf=strlen(config.ftr1_conf_file)>0;
if(hasConf)
assert(strcmp(config.ftr1_format, "pfile")==0); // only implemented for pfiles
// unary_file.
enum { UNARYFILE_BUF_SIZE = 0x8000 };
const char* unary_file = config.unary_file;
FILE* unary_fp = NULL;
char* unary_buf = NULL;
if (strcmp(unary_file, "")!=0)
{
assert(!hasConf);
unary_fp = QN_open(unary_file, "r");
unary_buf = new char[UNARYFILE_BUF_SIZE];
assert(setvbuf(unary_fp, unary_buf, _IOFBF,
UNARYFILE_BUF_SIZE)==0);
}
const char* hardtarget_file = config.hardtarget_file;
const char* softtarget_file = config.softtarget_file;
FILE* hardtarget_fp = NULL;
// FILE* softtarget_fp = NULL;
char* hardtarget_buf = NULL;
// char* softtarget_buf = NULL;
int lastlab_reject = config.hardtarget_lastlab_reject;
if (strcmp(hardtarget_file, "")!=0 && strcmp(softtarget_file, "")==0)
{
// hardtarget_file.
enum { LABFILE_BUF_SIZE = 0x8000 };
hardtarget_fp = QN_open(hardtarget_file, "r");
hardtarget_buf = new char[LABFILE_BUF_SIZE];
assert(setvbuf(hardtarget_fp, hardtarget_buf, _IOFBF,
LABFILE_BUF_SIZE)==0);
}
else if (strcmp(hardtarget_file, "")==0 && strcmp(softtarget_file, "")!=0)
{
// opened within create_ftrstream
// softtarget_file.
// enum { LABFILE_BUF_SIZE = 0x8000 };
// softtarget_fp = QN_open(softtarget_file, "r");
// softtarget_buf = new char[LABFILE_BUF_SIZE];
if (lastlab_reject)
{
QN_ERROR(NULL, "hardtarget_lastlab_reject cannot be true if no "
"hardtarget_file is specified");
}
}
else
{
QN_ERROR(NULL, "must specify one and only one of hardtarget_file "
"and softtarget_file");
}
// ftr1_norm_file.
FILE* ftr1_norm_fp = NULL;
const char* ftr1_norm_file = config.ftr1_norm_file;
if (strcmp(ftr1_norm_file, "")!=0)
{
ftr1_norm_fp = QN_open(ftr1_norm_file, "r");
}
// ftr2_norm_file.
FILE* ftr2_norm_fp = NULL;
const char* ftr2_norm_file = config.ftr2_norm_file;
if (strcmp(ftr2_norm_file, "")!=0)
{
if (strcmp(config.ftr2_file, "")==0)
QN_ERROR(NULL, "ftr2_norm_file is specified but ftr2_file "
"is not.");
else if (config.ftr2_ftr_count==0)
QN_ERROR(NULL, "ftr2_norm_file is specified but ftr2_ftr_count "
"is 0.");
else
ftr2_norm_fp = QN_open(ftr2_norm_file, "r");
}
// Weight files.
FILE* init_weight_fp = NULL;
const char* init_weight_file = config.init_weight_file;
if (strcmp(init_weight_file, "")!=0)
{
init_weight_fp = QN_open(init_weight_file, "r");
}
FILE* out_weight_fp = NULL;
const char* out_weight_file = config.out_weight_file;
out_weight_fp = QN_open(out_weight_file, "w");
// Windowing.
int window_extent = config.window_extent;
if (window_extent<0 || window_extent>1000)
{
QN_ERROR(NULL, "window_extent must be in range 0-1000.");
}
int ftr1_window_offset = config.ftr1_window_offset;
if (ftr1_window_offset<0 || ftr1_window_offset>=window_extent)
{
QN_ERROR(NULL, "ftr1_window_offset must be less than "
" window_extent.");
}
int ftr1_window_len = config.ftr1_window_len;
if (ftr1_window_len<=0)
{
QN_ERROR(NULL, "ftr1_window_len must be greater than 0.");
}
if ((ftr1_window_offset + ftr1_window_len) > window_extent)
{
QN_ERROR(NULL, "ftr1_window_offset+ftr1_window_len must be "
"less than window_extent.");
}
int ftr2_window_offset = config.ftr2_window_offset;
int ftr2_window_len = config.ftr2_window_len;
// don't test ftr2_window_offset unless we have a file
if (strcmp(config.ftr2_file, "")!= 0 && config.ftr2_ftr_count > 0) {
if (ftr2_window_offset<0 || ftr2_window_offset>=window_extent)
{
QN_ERROR(NULL, "ftr2_window_offset must be less than "
" window_extent.");
}
if (ftr2_window_len<0)
{
QN_ERROR(NULL, "ftr2_window_len must be positive.");
}
if ((ftr2_window_offset + ftr2_window_len) > window_extent)
{
QN_ERROR(NULL, "ftr2_window_offset+ftr2_window_len must be "
"less than window_extent.");
}
}
// Don't worry about the unary_window_offset unless there is actually
// a unary_file (default value of 3 causes error for window_extent=1)
int unary_window_offset = config.unary_window_offset;
if ( (strcmp(unary_file, "")!=0) \
&& (unary_window_offset<0 || unary_window_offset>=window_extent))
{
QN_ERROR(NULL, "unary_window_offset must be less than "
" window_extent.");
}
int hardtarget_window_offset = config.hardtarget_window_offset;
if (hardtarget_window_offset<0 || hardtarget_window_offset>=window_extent)
{
QN_ERROR(NULL, "hardtarget_window_offset must be less than "
" window_extent.");
}
int softtarget_window_offset = config.softtarget_window_offset;
if (softtarget_window_offset<0 || softtarget_window_offset>=window_extent)
{
QN_ERROR(NULL, "softtarget_window_offset must be less than "
" window_extent.");
}
// Check for overlapping training and CV ranges.
size_t train_sent_start = config.train_sent_start;
size_t train_sent_count = (config.train_sent_count==INT_MAX) ?
(size_t) QN_ALL : config.train_sent_count;
size_t last_train_sent = (train_sent_count==QN_ALL) ?
INT_MAX : train_sent_start + train_sent_count - 1;
char* train_sent_range = config.train_sent_range;
size_t cv_sent_start = config.cv_sent_start;
size_t cv_sent_count = (config.cv_sent_count==INT_MAX) ?
(size_t) QN_ALL : config.cv_sent_count;
char* cv_sent_range = config.cv_sent_range;
size_t last_cv_sent = (cv_sent_count==QN_ALL) ?
INT_MAX : cv_sent_start + cv_sent_count - 1;
if (train_sent_range == 0 && cv_sent_range == 0 &&
((cv_sent_start>=train_sent_start && cv_sent_start<=last_train_sent)
|| (last_cv_sent>=train_sent_start && last_cv_sent<=last_train_sent)))
{
QN_WARN(NULL, "training and cv sentence ranges overlap.");
}
// Check for mlp3_input_size consistency.
size_t ftr1_ftr_start = config.ftr1_ftr_start;
size_t ftr2_ftr_start = config.ftr2_ftr_start;
size_t ftr1_ftr_count = config.ftr1_ftr_count;
size_t ftr2_ftr_count = config.ftr2_ftr_count;
size_t unary_size = config.unary_size;
size_t ftrfile_num_input = ftr1_ftr_count * ftr1_window_len
+ ftr2_ftr_count * ftr2_window_len + unary_size;
size_t mlp3_input_size = config.mlp3_input_size;
size_t mlp3_hidden_size = config.mlp3_hidden_size;
size_t mlp3_output_size = config.mlp3_output_size;
if (ftrfile_num_input!=mlp3_input_size)
{
QN_ERROR(NULL, "number of inputs to the net %d does not equal width"
" of data stream from feature files %d.", mlp3_input_size,
ftrfile_num_input);
}
// Sentence and randomization details.
long train_cache_frames = config.train_cache_frames;
int train_cache_seed = config.train_cache_seed;
if (train_cache_frames<1000)
{
QN_ERROR(NULL, "train_cache_frames must be greater than 1000.");
}
int init_random_seed = config.init_random_seed;
int debug = config.debug;
// Do ftr1_file stream creation.
QN_InFtrStream* ftr1_train_str = NULL;
QN_InFtrStream* ftr1_cv_str = NULL;
create_ftrstreams(debug, "ftr1_file", config.ftr1_file,
config.ftr1_format, config.ftr1_width,
ftr1_norm_fp,
ftr1_ftr_start, ftr1_ftr_count,
train_sent_start, train_sent_count,
train_sent_range,
cv_sent_start, cv_sent_count,
cv_sent_range,
window_extent,
ftr1_window_offset, ftr1_window_len,
config.ftr1_delta_order, config.ftr1_delta_win,
config.ftr1_norm_mode,
config.ftr1_norm_am, config.ftr1_norm_av,
train_cache_frames, train_cache_seed,
&ftr1_train_str, &ftr1_cv_str);
// Confidences for ftr1_train (must be same format, size as ftr1)
QN_InFtrStream* ftrfile_conf_train_str = NULL;
QN_InFtrStream* ftrfile_conf_cv_str = NULL;
if(hasConf) {
create_ftrstreams(debug, "ftr1_conf_file", config.ftr1_conf_file,
config.ftr1_format, 0, // width=0 allows conf_dim==1
NULL, // prevent normalization
ftr1_ftr_start, 0, // count==0 allows conf_dim==1
train_sent_start, train_sent_count,
train_sent_range,
cv_sent_start, cv_sent_count,
cv_sent_range,
window_extent,
ftr1_window_offset, ftr1_window_len,
config.ftr1_delta_order, config.ftr1_delta_win,
config.ftr1_norm_mode,
config.ftr1_norm_am, config.ftr1_norm_av,
train_cache_frames, train_cache_seed,
&ftrfile_conf_train_str, &ftrfile_conf_cv_str);
}
// Do ftr2_file stream creation.
QN_InFtrStream* ftr2_train_str = NULL;
QN_InFtrStream* ftr2_cv_str = NULL;
if (strcmp(config.ftr2_file, "")!=0)
{
assert(!hasConf); // confs not implemented for ftr2
if (config.ftr2_ftr_count==0)
QN_WARN(NULL, "ftr2_file is set but ftr2_ftr_count is 0.");
create_ftrstreams(debug, "ftr2_file", config.ftr2_file,
config.ftr2_format, config.ftr2_width,
ftr2_norm_fp,
ftr2_ftr_start, ftr2_ftr_count,
train_sent_start, train_sent_count,
train_sent_range,
cv_sent_start, cv_sent_count,
cv_sent_range,
window_extent,
ftr2_window_offset, ftr2_window_len,
config.ftr2_delta_order, config.ftr2_delta_win,
config.ftr2_norm_mode,
config.ftr2_norm_am, config.ftr2_norm_av,
train_cache_frames, train_cache_seed,
&ftr2_train_str, &ftr2_cv_str);
}
// Merge the two training feature streams.
QN_InFtrStream* ftrfile_train_str;
QN_InFtrStream* ftrfile_cv_str;
if (ftr2_train_str!=NULL)
{
assert(ftr2_cv_str!=NULL);
ftrfile_train_str = new QN_InFtrStream_JoinFtrs(debug, "train_ftrfile",
*ftr1_train_str,
*ftr2_train_str);
ftrfile_cv_str = new QN_InFtrStream_JoinFtrs(debug, "cv_ftrfile",
*ftr1_cv_str,
*ftr2_cv_str);
}
else
{
assert(ftr2_cv_str==NULL);
assert(ftr2_train_str==NULL);
ftrfile_train_str = ftr1_train_str;
ftrfile_cv_str = ftr1_cv_str;
}
// If necessary, add the unary input feature.
if (unary_fp!=NULL)
{
assert(!hasConf); // confs not implemented for this
QN_InLabStream* unary_train_str = NULL;
QN_InLabStream* unary_cv_str = NULL;
create_labstreams(debug, "unary", unary_fp,
"pfile", 0,
train_sent_start, train_sent_count,
train_sent_range,
cv_sent_start, cv_sent_count,
cv_sent_range,
window_extent,
unary_window_offset,
train_cache_frames, train_cache_seed,
&unary_train_str, &unary_cv_str);
// Convert the unary input label into a feature stream.
QN_InFtrStream* unaryftr_train_str = NULL;
QN_InFtrStream* unaryftr_cv_str = NULL;
unaryftr_train_str = new QN_InFtrStream_OneHot(debug,
"train_unaryfile",
*unary_train_str,
unary_size);
unaryftr_cv_str = new QN_InFtrStream_OneHot(debug,
"cv_unaryfile",
*unary_cv_str,
unary_size);
// Merge in the feature streams.
ftrfile_train_str = new QN_InFtrStream_JoinFtrs(debug,
"train_unaryfile",
*ftrfile_train_str,
*unaryftr_train_str);
ftrfile_cv_str = new QN_InFtrStream_JoinFtrs(debug, "cv_unaryfile",
*ftrfile_cv_str,
*unaryftr_cv_str);
}
QN_InLabStream* hardtarget_train_str = NULL;
QN_InLabStream* hardtarget_cv_str = NULL;
QN_InFtrStream* softtarget_train_str = NULL;
QN_InFtrStream* softtarget_cv_str = NULL;
// Does config.ftr1_file refer to just a single file?
int ftr1_onefile = 1;
if (strchr(config.ftr1_file, ',') != NULL) {
// filename looks like a comma-separated list
ftr1_onefile = 0;
// won't try to run pathcmp on it.
}
if (hardtarget_fp!=NULL)
{
// Do hardtarget stream creation.
// Handle formats where we need to know the number of ftrs to
// extract the labels.
// A bit of a hack!!
size_t hardtarget_width;
if (ftr1_onefile && QN_pathcmp(config.ftr1_file, hardtarget_file)==0)
hardtarget_width = config.ftr1_width;
else
hardtarget_width = 0;
char* hardtarget_format = config.hardtarget_format;
if (strcmp(hardtarget_format, "")==0)
hardtarget_format = config.ftr1_format;
create_labstreams(debug, "hardtarget", hardtarget_fp,
hardtarget_format, hardtarget_width,
train_sent_start, train_sent_count,
train_sent_range,
cv_sent_start, cv_sent_count,
cv_sent_range,
window_extent,
hardtarget_window_offset,
train_cache_frames, train_cache_seed,
&hardtarget_train_str, &hardtarget_cv_str);
}
else if (strcmp(softtarget_file,"")!=0)
{
assert(!hasConf); // confs not implemented for this
size_t softtarget_width = config.softtarget_width;
char* softtarget_format = config.softtarget_format;
if (strcmp(softtarget_format, "")==0)
softtarget_format = config.ftr1_format;
create_ftrstreams(debug, "softtarget", (char *)softtarget_file,
softtarget_format, softtarget_width,
NULL,
0, 0,
train_sent_start, train_sent_count,
train_sent_range,
cv_sent_start, cv_sent_count,
cv_sent_range,
window_extent,
softtarget_window_offset, 1,
0, 0, 0, /* no deltas or per-utt normalization */
0.0, 0.0,
train_cache_frames, train_cache_seed,
&softtarget_train_str, &softtarget_cv_str);
}
else
assert(0);
// Create the MLP.
QN_MLP* mlp;
create_mlp(debug, "mlp",
mlp3_input_size,mlp3_hidden_size,
mlp3_output_size,config.mlp3_output_type,
config.mlp3_bunch_size, config.threads,hasConf,
&mlp);
// Create the leaning rate schedule.
QN_RateSchedule* lr_schedule;
create_learnrate_schedule(debug, "learnrate",
config.learnrate_schedule,
config.learnrate_vals.vals,
config.learnrate_vals.count,
config.learnrate_scale,
config.learnrate_epochs,
&lr_schedule);
// A weight file of "" means randomize.
if (init_weight_fp==NULL)
{
if (verbose>0)
{
QN_OUTPUT("Randomizing weights...");
}
if (config.init_random_weight_min.count<1 ||
config.init_random_weight_min.count>2 ||
config.init_random_weight_max.count<1 ||
config.init_random_weight_max.count>2 ||
config.init_random_bias_min.count<1 ||
config.init_random_bias_min.count>2 ||
config.init_random_bias_max.count<1 ||
config.init_random_bias_max.count>2) {
QN_ERROR(NULL,"weight/bias list initializations must either have 1 or 2 elements");
}
float in2hid_min = config.init_random_weight_min.vals[0];
float in2hid_max = config.init_random_weight_max.vals[0];
float hidbias_min = config.init_random_bias_min.vals[0];
float hidbias_max = config.init_random_bias_max.vals[0];
/* if initialization lists have 1 member, use for both layer 1 and 2
if 2 members, use separate initializations */
float hid2out_min = config.init_random_weight_min.vals[(config.init_random_weight_min.count==1)?0:1];
float hid2out_max = config.init_random_weight_max.vals[(config.init_random_weight_max.count==1)?0:1];
float outbias_min = config.init_random_bias_min.vals[(config.init_random_bias_min.count==1)?0:1];
float outbias_max = config.init_random_bias_max.vals[(config.init_random_bias_max.count==1)?0:1];
QN_randomize_weights(debug, init_random_seed, *mlp,
in2hid_min, in2hid_max,
hidbias_min, hidbias_max,
hid2out_min, hid2out_max,
outbias_min, outbias_max);
if (verbose>0)
{
QN_OUTPUT("Randomized weights.");
}
}
else
{
float min, max;
if (verbose>0)
{
QN_OUTPUT("Loading weights...");
}
QN_MLPWeightFile_RAP3 inwfile(debug, init_weight_fp,
QN_READ,
init_weight_file,
mlp3_input_size, mlp3_hidden_size,
mlp3_output_size);
QN_read_weights(inwfile, *mlp, &min, &max, debug);
QN_OUTPUT("Weights loaded from file, min=%g max=%g.",
min, max);
}
const char* log_weight_file = config.log_weight_file;
size_t train_chunk_size; // The number of presentations read
// at one time.
size_t mlp3_bunch_size = config.mlp3_bunch_size;
if (mlp3_bunch_size>1)
{
train_chunk_size = mlp3_bunch_size;
}
else
train_chunk_size = 16; // By default, use a size of 16.
if (hardtarget_train_str!=NULL)
{
assert(hardtarget_cv_str!=NULL);
QN_HardSentTrainer* trainer =
new QN_HardSentTrainer(debug, // Debugging level.
"trainer", // Debugging tag.
verbose, // Verbosity level.
mlp, // MLP.
ftrfile_train_str, // Training ftr strm.
hardtarget_train_str, // Training label str.
ftrfile_cv_str, // CV feature stream.
hardtarget_cv_str, // CV label stream.
ftrfile_conf_train_str, // Train conf ftr strm.
ftrfile_conf_cv_str, // CV conf ftr stream.
lr_schedule, // Learning rate scheduler.
0.0, // Low target.
1.0, // High target.
log_weight_file, // Where we log weights.
train_chunk_size, // Batch size.
lastlab_reject // Allow untrainable frames
);
trainer->train();
delete trainer;
}
else
{
assert(softtarget_train_str!=NULL);
assert(softtarget_cv_str!=NULL);
assert(!hasConf); // confs not implemented for this
QN_SoftSentTrainer* trainer =
new QN_SoftSentTrainer(debug, // Debugging level.
"trainer", // Debugging tag.
verbose, // Verbosity level.
mlp, // MLP.
ftrfile_train_str, // Training ftr strm.
softtarget_train_str, // Training label str.
ftrfile_cv_str, // CV feature stream.
softtarget_cv_str, // CV label stream.
lr_schedule, // Learning rate scheduler.
0.0, // Low target.
1.0, // High target.
log_weight_file, // Where we log weights.
train_chunk_size // Batch size.
);
trainer->train();
delete trainer;
}
if (verbose>0)
{
QN_OUTPUT("Starting to write weights...");
}
float min, max;
QN_MLPWeightFile_RAP3 outwfile(debug, out_weight_fp, QN_WRITE,
out_weight_file,
mlp3_input_size, mlp3_hidden_size,
mlp3_output_size);
QN_write_weights(outwfile, *mlp, &min, &max, debug);
QN_OUTPUT("Weights written to '%s'.", out_weight_file);
// A note for the logfile.
time(&now);
QN_OUTPUT("Program stop: %.24s", ctime(&now));
delete mlp;
if (out_weight_fp!=NULL)
QN_close(out_weight_fp);
if (init_weight_fp!=NULL)
QN_close(init_weight_fp);
if (ftr2_norm_fp!=NULL)
QN_close(ftr2_norm_fp);
if (ftr1_norm_fp!=NULL)
QN_close(ftr1_norm_fp);
// if (softtarget_fp!=NULL)
// {
// QN_close(softtarget_fp);
// delete softtarget_buf;
// }
if (hardtarget_fp!=NULL)
{
QN_close(hardtarget_fp);
delete [] hardtarget_buf;
}
if (unary_fp!=NULL)
{
QN_close(unary_fp);
delete unary_buf;
}
// if (ftr2_fp!=NULL)
// {
// QN_close(ftr2_fp);
// delete ftr2_buf;
// }
// QN_close(ftr1_fp);
// delete ftr1_buf;
QN_close_ftrfiles();
}
int
main(int argc, const char* argv[])
{
char* progname; // The name of the prog - set by QN_initargs.
FILE* log_fp;
char log_buf[160];
set_defaults();
QN_initargs(&argtab[0], &argc, &argv, &progname);
// map norm_mode_str to val
config.ftr1_norm_mode = QN_string_to_norm_const(config.ftr1_norm_mode_str);
config.ftr2_norm_mode = QN_string_to_norm_const(config.ftr2_norm_mode_str);
// Seed the random number generator.
srand48(config.init_random_seed);
log_fp = QN_open(config.log_file, "w");
assert(setvbuf(log_fp, log_buf, _IOLBF, sizeof(log_buf))==0);
QN_printargs(log_fp, progname, &argtab[0]);
QN_logger = new QN_Logger_Simple(log_fp, stderr, progname);
// Install our own out-of-memory handler if possible.
#ifdef QN_HAVE_SET_NEW_HANDLER
set_new_handler(QN_new_handler);
#endif
// Set the math mode
qn_math = config.mlp3_pp ? QN_MATH_PP : QN_MATH_NV;
#ifdef QN_HAVE_LIBBLAS
qn_math |= config.mlp3_blas ? QN_MATH_BL : 0;
#else
if (config.mlp3_blas)
{
QN_ERROR(NULL, "cannot enable BLAS library as none is linked with the "
"executable.");
}
#endif // #ifdef QN_HAVE_LIBBLAS
qnstrn();
exit(EXIT_SUCCESS);
}
[-- Attachment #3: Type: text/plain, Size: 204 bytes --]
_______________________________________________
Emacs-orgmode mailing list
Remember: use `Reply All' to send replies to the list.
Emacs-orgmode@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-orgmode
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: bug in org-store-link
2008-02-26 20:40 bug in org-store-link Scott Otterson
@ 2008-02-27 14:55 ` Carsten Dominik
2008-02-27 16:20 ` Nick Dokos
2008-02-27 19:05 ` Scott Otterson
0 siblings, 2 replies; 7+ messages in thread
From: Carsten Dominik @ 2008-02-27 14:55 UTC (permalink / raw)
To: Scott Otterson; +Cc: emacs-orgmode
Hi Scott, this is not a small bug, but a problem that is really hard
to solve.
Supposed I used the exact line text to search, then you still have two
lines in the buffer
that would match.
This is really about what strategy should be used to find a location
in a file that has possibly changed.
I have no good answer to that. Do you?
- Carsten
On Feb 26, 2008, at 9:40 PM, Scott Otterson wrote:
> Small bug in org store link. To reproduce, put the cursor on line
> 1007 and run org-store-link. Then use the result to create a
> hyperlink in an org file, which for me looks like:
>
> [[file:~/lib/c/pkgs/quicknet/qnstrn.cc::ftr1_window_offset
> %20ftr1_window_len][call]]
>
> Then click on that hyperlink. I get sent to line 899 instead of
> line 1007.
> It looks like the reason is that org tosses out puncutation (+>,).
> I've found that, when linking to source code, punctuation is a big
> deal, so, if possible, it would be nice if org mode was made
> sensitive to it.
>
> Keep up the good work,
>
> Scott
>
> #ifndef NO_RCSID
> const char* qnstrn_rcsid =
> "$Header: /homes/scotto/lib/cvsroot/lib/c/pkgs/quicknet/
> qnstrn.cc,v 1.5 2007/01/24 00:07:46 scotto Exp $";
> #endif
>
> #include <QN_config.h>
> #include <assert.h>
> #include <float.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <time.h>
> #ifdef QN_HAVE_LIMITS_H
> #include <limits.h>
> #endif
> #ifndef EXIT_SUCCESS
> #define EXIT_SUCCESS (0)
> #define EXIT_FAILURE (1)
> #endif
> #include <sys/types.h>
> #ifdef QN_HAVE_SYS_TIME_H
> #include <sys/time.h>
> #endif
> #ifdef QN_HAVE_SYS_PARAM_H
> #include <sys/param.h>
> #endif
> #include <unistd.h>
>
> #if !QN_HAVE_DECL_SRAND48
> extern "C" {
> void srand48(long);
> }
> #endif
>
> #ifdef QN_HAVE_SET_NEW_HANDLER
> extern "C" {
> typedef void (*new_handler)(void);
> new_handler set_new_handler (new_handler);
> }
> #endif
>
>
> #ifndef FILENAME_MAX
> #define FILENAME_MAX (MAXPATHLEN)
> #endif
>
> #include "QuickNet.h"
>
> static struct {
> char* ftr1_file;
> char* ftr1_format;
> int ftr1_width;
> char* ftr1_conf_file;
> char* ftr2_file;
> char* ftr2_format;
> int ftr2_width;
> char* unary_file;
> char* hardtarget_file;
> char* hardtarget_format;
> char* softtarget_file;
> char* softtarget_format;
> int softtarget_width;
> char* ftr1_norm_file;
> char* ftr2_norm_file;
> int ftr1_ftr_start;
> int ftr2_ftr_start;
> int ftr1_ftr_count;
> int ftr2_ftr_count;
> int hardtarget_lastlab_reject;
> int window_extent;
> int ftr1_window_offset;
> int ftr2_window_offset;
> int unary_window_offset;
> int hardtarget_window_offset;
> int softtarget_window_offset;
> int ftr1_window_len;
> int ftr2_window_len;
> int ftr1_delta_order;
> int ftr1_delta_win;
> char* ftr1_norm_mode_str;
> int ftr1_norm_mode;
> double ftr1_norm_am;
> double ftr1_norm_av;
> int ftr2_delta_order;
> int ftr2_delta_win;
> char* ftr2_norm_mode_str;
> int ftr2_norm_mode;
> double ftr2_norm_am;
> double ftr2_norm_av;
> long train_cache_frames;
> int train_cache_seed;
> long train_sent_start;
> long train_sent_count;
> char* train_sent_range;
> long cv_sent_start;
> long cv_sent_count;
> char* cv_sent_range;
>
> QN_Arg_ListFloat init_random_bias_min;
> QN_Arg_ListFloat init_random_bias_max;
>
> QN_Arg_ListFloat init_random_weight_min;
> QN_Arg_ListFloat init_random_weight_max;
>
> int init_random_seed;
> char* init_weight_file;
> char* log_weight_file;
> char* out_weight_file;
> char* learnrate_schedule;
> QN_Arg_ListFloat learnrate_vals;
> long learnrate_epochs;
> float learnrate_scale;
> int unary_size;
> int mlp3_input_size;
> int mlp3_hidden_size;
> int mlp3_output_size;
> char* mlp3_output_type;
> int mlp3_fx; // NO LONGER USED
> int mlp3_weight_bits; // NO LONGER USED
> int mlp3_in2hid_exp; // NO LONGER USED
> int mlp3_hid2out_exp; // NO LONGER USED
> int mlp3_bunch_size;
> int mlp3_blas;
> int mlp3_pp;
> int threads;
> int slaves; // NO LONGER USED
> char *cpu; // NO LONGER USED
> char* log_file; // Stream for storing status messages.
> int verbose;
> int debug; // Debug level.
> } config;
>
> static void
> set_defaults(void)
> {
> static float default_learnrate[1] = { 0.008 };
> static float default_bias_min[1] = { -4.1 };
> static float default_bias_max[1] = { -3.9 };
> static float default_weight_min[1] = { -0.1 };
> static float default_weight_max[1] = { 0.1 };
>
> config.ftr1_file = "";
> config.ftr1_format = "pfile";
> config.ftr1_width = 0;
> config.ftr1_conf_file = "";
> config.ftr2_file = "";
> config.ftr2_format = "pfile";
> config.ftr2_width = 0;
> config.unary_file = "";
> config.hardtarget_file = "";
> config.hardtarget_format = "";
> config.softtarget_file = "";
> config.softtarget_format = "pfile";
> config.softtarget_width = 0;
> config.ftr1_norm_file = "";
> config.ftr2_norm_file = "";
> config.ftr1_ftr_start = 0;
> config.ftr2_ftr_start = 0;
> config.ftr1_ftr_count = 0;
> config.ftr2_ftr_count = 0;
> config.hardtarget_lastlab_reject = 0;
> config.window_extent = 9;
> config.ftr1_window_offset = 0;
> config.ftr2_window_offset = 4;
> config.unary_window_offset = 3;
> config.hardtarget_window_offset = 0;
> config.softtarget_window_offset = 0;
> config.ftr1_window_len = 9;
> config.ftr2_window_len = 0;
> config.ftr1_delta_order = 0;
> config.ftr1_delta_win = 9;
> config.ftr1_norm_mode_str = NULL;
> config.ftr1_norm_mode = QN_NORM_FILE;
> config.ftr1_norm_am = QN_DFLT_NORM_AM;
> config.ftr1_norm_av = QN_DFLT_NORM_AV;
> config.ftr2_delta_order = 0;
> config.ftr2_delta_win = 9;
> config.ftr2_norm_mode_str = NULL;
> config.ftr2_norm_mode = QN_NORM_FILE;
> config.ftr2_norm_am = QN_DFLT_NORM_AM;
> config.ftr2_norm_av = QN_DFLT_NORM_AV;
> config.train_cache_frames = 10000;
> config.train_cache_seed = 0;
> config.train_sent_start = 0;
> config.train_sent_count = INT_MAX;
> config.train_sent_range = 0;
> config.cv_sent_start = 0;
> config.cv_sent_count = INT_MAX;
> config.cv_sent_range = 0;
>
> config.init_random_bias_min.count = 1;
> config.init_random_bias_min.vals = &default_bias_min[0];
> config.init_random_bias_max.count = 1;
> config.init_random_bias_max.vals = &default_bias_max[0];
>
> config.init_random_weight_min.count = 1;
> config.init_random_weight_min.vals = &default_weight_min[0];
> config.init_random_weight_max.count = 1;
> config.init_random_weight_max.vals = &default_weight_max[0];
>
> config.init_random_seed = 0;
> config.init_weight_file = "";
> config.log_weight_file = "log%p.weights";
> config.out_weight_file = "out.weights";
> config.learnrate_schedule = "newbob";
> config.learnrate_vals.count = 1;
> config.learnrate_vals.vals = &default_learnrate[0];
> config.learnrate_epochs = 9999;
> config.learnrate_scale = 0.5;
> config.unary_size = 0;
> config.mlp3_input_size = 153;
> config.mlp3_hidden_size = 200;
> config.mlp3_output_size = 56;
> config.mlp3_output_type = "softmax";
> config.mlp3_fx = 0;
> config.mlp3_weight_bits = 32;
> config.mlp3_in2hid_exp = 2;
> config.mlp3_hid2out_exp = 2;
> config.mlp3_bunch_size = 16;
> #ifdef QN_HAVE_LIBBLAS
> config.mlp3_blas = 1;
> #else
> config.mlp3_blas = 0;
> #endif
> config.mlp3_pp = 1;
> config.threads = 1;
> config.slaves = 0;
> config.cpu = "host";
> config.log_file = "-";
> config.verbose = 0;
> config.debug = 0;
> }
>
> QN_ArgEntry argtab[] =
> {
> { NULL, "QuickNet MLP training program version " QN_VERSION,
> QN_ARG_DESC },
> { "ftr1_file", "Input feature file", QN_ARG_STR,
> &(config.ftr1_file), QN_ARG_REQ },
> { "ftr1_format", "Main feature file format
> [pfile,pre,lna,onlftr,srifile,srilist]", QN_ARG_STR,
> &(config.ftr1_format) },
> { "ftr1_width", "Main feature file feature columns", QN_ARG_INT,
> &(config.ftr1_width) },
> { "ftr1_conf_file", "Confidences for ftr1. Format and number of
> frames matches ftr1. If confidence dimension is 1, then the weight
> will be applied across all elements in a feature frame; otherwise,
> the dimension must match ft1. ftr2 confs not implemented",
> QN_ARG_STR, &(config.ftr1_conf_file) },
> { "ftr2_file", "Second input feature file", QN_ARG_STR,
> &(config.ftr2_file) },
> { "ftr2_format","Secondary feature file format
> [pfile,pre,lna,onlftr,srifile,srilist]", QN_ARG_STR,
> &(config.ftr2_format) },
> { "ftr2_width", "Secondary feature file feature columns", QN_ARG_INT,
> &(config.ftr2_width) },
> { "unary_file", "Auxilliary unary file", QN_ARG_STR,
> &(config.unary_file) },
> { "hardtarget_file", "Target label file", QN_ARG_STR,
> &(config.hardtarget_file) },
> { "hardtarget_format", "Target label file format [pfile,pre,ilab]",
> QN_ARG_STR,
> &(config.hardtarget_format) },
> { "softtarget_file", "Target feature file", QN_ARG_STR,
> &(config.softtarget_file) },
> { "softtarget_format", "Target feature file format
> [pfile,pre,lna,onlftr]", QN_ARG_STR,
> &(config.softtarget_format) },
> { "softtarget_width", "Target feature file feature columns",
> QN_ARG_INT,
> &(config.softtarget_width) },
> { "ftr1_norm_file", "Normalization parameters for ftr1_file",
> QN_ARG_STR,
> &(config.ftr1_norm_file) },
> { "ftr2_norm_file", "Normalization parameters for ftr2_file",
> QN_ARG_STR,
> &(config.ftr2_norm_file) },
> { "ftr1_ftr_start", "First feature used from ftr1_file",
> QN_ARG_INT, &(config.ftr1_ftr_start) },
> { "ftr2_ftr_start", "First feature used from ftr2_file",
> QN_ARG_INT, &(config.ftr2_ftr_start) },
> { "ftr1_ftr_count", "Number of features used from ftr1_file",
> QN_ARG_INT, &(config.ftr1_ftr_count) },
> { "ftr2_ftr_count", "Number of features used from ftr2_file",
> QN_ARG_INT, &(config.ftr2_ftr_count) },
> { "hardtarget_lastlab_reject", "Last label value indicates no-train
> frames",
> QN_ARG_BOOL, &(config.hardtarget_lastlab_reject) },
> { "window_extent", "Extent of all windows (frames)", QN_ARG_INT,
> &(config.window_extent) },
> { "ftr1_window_offset", "Offset of window on ftr1_file (frames)",
> QN_ARG_INT, &(config.ftr1_window_offset) },
> { "ftr2_window_offset", "Offset of window on ftr2_file (frames)",
> QN_ARG_INT, &(config.ftr2_window_offset) },
> { "unary_window_offset", "Offset of window on unary_file (frames)",
> QN_ARG_INT, &(config.unary_window_offset) },
> { "hardtarget_window_offset", "Offset of window on target label file
> (frames)",
> QN_ARG_INT, &(config.hardtarget_window_offset) },
> { "softtarget_window_offset", "Offset of window on target feature
> file (frames)",
> QN_ARG_INT, &(config.softtarget_window_offset) },
> { "ftr1_window_len", "Length of window on ftr1_file (frames)",
> QN_ARG_INT,
> &(config.ftr1_window_len) },
> { "ftr2_window_len", "Length of window on ftr2_file (frames)",
> QN_ARG_INT,
> &(config.ftr2_window_len) },
> { "ftr1_delta_order", "Order of derivatives added to ftr1_file",
> QN_ARG_INT,
> &(config.ftr1_delta_order) },
> { "ftr1_delta_win", "Window size for ftr1_file delta-calculation",
> QN_ARG_INT,
> &(config.ftr1_delta_win) },
> { "ftr1_norm_mode", "Normalization mode (file/utts/online)",
> QN_ARG_STR,
> &(config.ftr1_norm_mode_str) },
> { "ftr1_norm_alpha_m", "Update constant for online norm means",
> QN_ARG_DOUBLE,
> &(config.ftr1_norm_am) },
> { "ftr1_norm_alpha_v", "Update constant for online norm vars",
> QN_ARG_DOUBLE,
> &(config.ftr1_norm_av) },
> { "ftr2_delta_order", "Order of derivatives added to ftr2_file",
> QN_ARG_INT,
> &(config.ftr2_delta_order) },
> { "ftr2_delta_win", "Window size for ftr2_file delta-calculation",
> QN_ARG_INT,
> &(config.ftr2_delta_win) },
> { "ftr2_norm_mode", "Normalization mode (file/utts/online)",
> QN_ARG_STR,
> &(config.ftr2_norm_mode_str) },
> { "ftr2_norm_alpha_m", "Update constant for online norm means",
> QN_ARG_DOUBLE,
> &(config.ftr2_norm_am) },
> { "ftr2_norm_alpha_v", "Update constant for online norm vars",
> QN_ARG_DOUBLE,
> &(config.ftr2_norm_av) },
> { "train_cache_frames", "Number of training frames in cache",
> QN_ARG_LONG, &(config.train_cache_frames) },
> { "train_cache_seed", "Training presentation randomization seed",
> QN_ARG_INT, &(config.train_cache_seed) },
> { "train_sent_start", "Number of first training sentence",
> QN_ARG_LONG, &(config.train_sent_start) },
> { "train_sent_count", "Number of training sentences",
> QN_ARG_LONG, &(config.train_sent_count) },
> { "train_sent_range", "Training sentence indices in QN_Range(3)
> format",
> QN_ARG_STR, &(config.train_sent_range) },
> { "cv_sent_start", "Number of first cross validation sentence",
> QN_ARG_LONG, &(config.cv_sent_start) },
> { "cv_sent_count", "Number of cross validation sentences",
> QN_ARG_LONG, &(config.cv_sent_count) },
> { "cv_sent_range", "Cross validation sentence indices in QN_Range(3)
> format",
> QN_ARG_STR, &(config.cv_sent_range) },
> { "init_random_bias_min", "Minimum random bias (per layer)",
> QN_ARG_LIST_FLOAT,
> &(config.init_random_bias_min) },
> { "init_random_bias_max", "Maximum random bias (per layer)",
> QN_ARG_LIST_FLOAT,
> &(config.init_random_bias_max) },
> { "init_random_weight_min", "Minimum random weight (per layer)",
> QN_ARG_LIST_FLOAT,
> &(config.init_random_weight_min) },
> { "init_random_weight_max", "Maximum random weight (per layer)",
> QN_ARG_LIST_FLOAT,
> &(config.init_random_weight_max) },
> { "init_random_seed", "Net initialization random number seed",
> QN_ARG_INT, &(config.init_random_seed) },
> { "init_weight_file", "Input weight file", QN_ARG_STR,
> &(config.init_weight_file) },
> { "log_weight_file", "Log weight file", QN_ARG_STR,
> &(config.log_weight_file) },
> { "out_weight_file", "Output weight file", QN_ARG_STR,
> &(config.out_weight_file) },
> { "learnrate_schedule", "LR schedule type [newbob,list,smoothdecay]",
> QN_ARG_STR, &(config.learnrate_schedule) },
> { "learnrate_vals", "Learning rates",
> QN_ARG_LIST_FLOAT, &(config.learnrate_vals) },
> { "learnrate_epochs", "Maximum number of epochs", QN_ARG_LONG,
> &(config.learnrate_epochs) },
> { "learnrate_scale", "Scale factor of successive learning rates",
> QN_ARG_FLOAT,
> &(config.learnrate_scale) },
> { "unary_size", "Number of unary inputs to net",
> QN_ARG_INT, &(config.unary_size)},
> { "mlp3_input_size", "Number of units in input layer",
> QN_ARG_INT, &(config.mlp3_input_size)},
> { "mlp3_hidden_size","Number of units in hidden layer",
> QN_ARG_INT, &(config.mlp3_hidden_size) },
> { "mlp3_output_size","Number of units in output layer",
> QN_ARG_INT, &(config.mlp3_output_size) },
> { "mlp3_output_type","Type of non-linearity in MLP output layer
> [sigmoid,sigmoidx,softmax]",
> QN_ARG_STR, &(config.mlp3_output_type) },
> { "mlp3_fx","NO LONGER USED",
> QN_ARG_BOOL, &(config.mlp3_fx) },
> { "mlp3_weight_bits","NO LONGER USED",
> QN_ARG_INT, &(config.mlp3_weight_bits) },
> { "mlp3_in2hid_exp","NO LONGER USED",
> QN_ARG_INT, &(config.mlp3_in2hid_exp) },
> { "mlp3_hid2out_exp","NO LONGER USED",
> QN_ARG_INT, &(config.mlp3_hid2out_exp) },
> { "mlp3_bunch_size","Size of bunches used in MLP training",
> QN_ARG_INT, &(config.mlp3_bunch_size) },
> { "mlp3_blas","Use BLAS libraries",
> QN_ARG_BOOL, &(config.mlp3_blas) },
> { "mlp3_pp","Use internal high-performance libraries",
> QN_ARG_BOOL, &(config.mlp3_pp) },
> { "mlp3_threads","Number of threads in MLP object",
> QN_ARG_INT, &(config.threads) },
> { "slaves","NO LONGER USED",
> QN_ARG_INT, &(config.slaves) },
> { "cpu","NO LONGER USED",
> QN_ARG_STR, &(config.cpu) },
> { "log_file", "File for status messages", QN_ARG_STR,
> &(config.log_file) },
> { "verbose", "Output extra status messages",
> QN_ARG_BOOL, &(config.verbose) },
> { "debug", "Level of internal diagnostic output",
> QN_ARG_INT, &(config.debug) },
> { NULL, NULL, QN_ARG_NOMOREARGS }
> };
>
> // QN_open_ftrstream, QN_open_ftrfile and QN_close_ftrfiles all
> moved to QN_utils.cc
>
> // A function to create a train and cross validation stream for a
> given
> // feature file. Also handles opening multiple files if
> // stream comes from a sequence of files.
>
> void
> create_ftrstreams(int debug, const char* dbgname, char* filename,
> const char* format, size_t width,
> FILE* normfile, size_t first_ftr, size_t num_ftrs,
> size_t train_sent_start, size_t train_sent_count,
> char* train_sent_range,
> size_t cv_sent_start, size_t cv_sent_count,
> char* cv_sent_range,
> size_t window_extent, size_t window_offset,
> size_t window_len,
> int delta_order, int delta_win,
> int norm_mode, double norm_am, double norm_av,
> size_t train_cache_frames, int train_cache_seed,
> QN_InFtrStream** train_str_ptr, QN_InFtrStream** cv_str_ptr)
> {
> QN_InFtrStream* ftr_str = NULL; // Temporary stream holder.
> int index = 1; // training always requires indexed
> int buffer_frames = 500;
>
> ftr_str = QN_build_ftrstream(debug, dbgname, filename, format,
> width, index, normfile,
> first_ftr, num_ftrs,
> 0, QN_ALL, // do utt selection ourselves
> buffer_frames,
> delta_order, delta_win,
> norm_mode, norm_am, norm_av);
>
> // Create training and cross-validation streams.
> QN_InFtrStream_Cut* train_ftr_str = NULL;
> QN_InFtrStream_Cut2* cv_ftr_str = NULL;
>
> if (train_sent_range != 0) {
> if ( !(train_sent_start == 0 && train_sent_count == QN_ALL) ) {
> QN_ERROR("create_ftrstreams",
> "You cannot specify train_sents by both range "
> "and start/count.");
> }
> }
>
> if (cv_sent_range != 0) {
> if ( !(cv_sent_start == 0 && cv_sent_count == QN_ALL) ) {
> QN_ERROR("create_ftrstreams",
> "You cannot specify cv_sents by both range "
> "and start/count.");
> }
> }
>
> if ( (train_sent_range == 0 && cv_sent_range != 0) \
> || (train_sent_range != 0 && cv_sent_range == 0) ) {
> QN_ERROR("create_ftrstreams",
> "If you use ranges for one of train_sents or cv_sents, "
> "you must use it for both.");
> }
>
> if (train_sent_range == 0) {
> // Using old-style start & count, not range strings
> QN_InFtrStream_Cut* fwd_ftr_str
> = new QN_InFtrStream_Cut(debug, dbgname, *ftr_str,
> train_sent_start,
> train_sent_count,
> cv_sent_start,
> cv_sent_count);
> train_ftr_str = (QN_InFtrStream_Cut*)fwd_ftr_str;
> } else {
> // Using range strings
> QN_InFtrStream_CutRange* fwd_ftr_str
> = new QN_InFtrStream_CutRange(debug, dbgname, *ftr_str,
> train_sent_range,
> cv_sent_range);
> train_ftr_str = (QN_InFtrStream_Cut*)fwd_ftr_str;
> }
> cv_ftr_str = new QN_InFtrStream_Cut2(*train_ftr_str);
>
> // Create training and CV windows.
> size_t bot_margin = window_extent - window_offset - window_len;
> QN_InFtrStream_RandWindow* train_winftr_str =
> new QN_InFtrStream_RandWindow(debug, dbgname,
> *train_ftr_str, window_len,
> window_offset, bot_margin,
> train_cache_frames, train_cache_seed
> );
> QN_InFtrStream_SeqWindow* cv_winftr_str =
> new QN_InFtrStream_SeqWindow(debug, dbgname,
> *cv_ftr_str, window_len,
> window_offset, bot_margin
> );
> *train_str_ptr = train_winftr_str;
> *cv_str_ptr = cv_winftr_str;
> }
>
> // A function to create a train and cross validation stream for a
> given
> // label file.
>
> void
> create_labstreams(int debug, const char* dbgname, FILE*
> hardtarget_file,
> const char* format, size_t width,
> size_t train_sent_start, size_t train_sent_count,
> char* train_sent_range,
> size_t cv_sent_start, size_t cv_sent_count,
> char* cv_sent_range,
> size_t window_extent, size_t window_offset,
> size_t train_cache_frames, int train_cache_seed,
> QN_InLabStream** train_str_ptr, QN_InLabStream** cv_str_ptr)
> {
> QN_InLabStream* lab_str; // Temporary stream holder.
>
> // Convert the file descriptor into a stream.
> if (strcmp(format, "pfile")==0)
> {
> QN_InFtrLabStream_PFile* pfile_str =
> new QN_InFtrLabStream_PFile(debug, // Select debugging.
> dbgname, // Debugging tag.
> hardtarget_file, // Label file.
> 1 // Indexed flag.
> );
> if (pfile_str->num_labs()!=1)
> {
> QN_ERROR("create_labstreams",
> "Label file has %lu features, should only be 1.",
> (unsigned long) pfile_str->num_labs() );
> }
> lab_str = pfile_str;
> }
> else if (strcmp(format, "pre")==0)
> {
> QN_InFtrLabStream_PreFile* prefile_str =
> new QN_InFtrLabStream_PreFile(debug, // Select debugging.
> dbgname, // Debugging tag.
> hardtarget_file, // Label file.
> width, // No of ftrs.
> 1 // Indexed flag.
> );
> lab_str = prefile_str;
> }
> else if (strcmp(format, "ilab")==0)
> {
> QN_InLabStream_ILab* ilab_str =
> new QN_InLabStream_ILab(debug, // Select debugging.
> dbgname, // Debugging tag.
> hardtarget_file, // Label file.
> 1 // Indexed flag.
> );
> lab_str = ilab_str;
> }
> else
> {
> QN_ERROR(dbgname, "unknown label file format '%s'.", format);
> lab_str = NULL;
> }
>
>
> // Create training and cross-validation streams.
> QN_InLabStream_Cut* train_lab_str = NULL;
> QN_InLabStream_Cut2* cv_lab_str = NULL;
> if (train_sent_range != 0) {
> if ( !(train_sent_start == 0 && train_sent_count == QN_ALL) ) {
> QN_ERROR("create_labstreams",
> "You cannot specify train_sents by both range "
> "and start/count.");
> }
> }
>
> if (cv_sent_range != 0) {
> if ( !(cv_sent_start == 0 && cv_sent_count == QN_ALL) ) {
> QN_ERROR("create_labstreams",
> "You cannot specify cv_sents by both range "
> "and start/count.");
> }
> }
>
> if ( (train_sent_range == 0 && cv_sent_range != 0) \
> || (train_sent_range != 0 && cv_sent_range == 0) ) {
> QN_ERROR("create_labstreams",
> "If you use ranges for one of train_sents or cv_sents, "
> "you must use it for both.");
> }
>
> if (train_sent_range == 0) {
> // Using old-style start & count, not range strings
> QN_InLabStream_Cut* fwd_lab_str
> = new QN_InLabStream_Cut(debug, dbgname, *lab_str,
> train_sent_start,
> train_sent_count,
> cv_sent_start,
> cv_sent_count);
> train_lab_str = (QN_InLabStream_Cut*)fwd_lab_str;
> } else {
> // Using range strings
> QN_InLabStream_CutRange* fwd_lab_str
> = new QN_InLabStream_CutRange(debug, dbgname, *lab_str,
> train_sent_range,
> cv_sent_range);
> train_lab_str = (QN_InLabStream_Cut*)fwd_lab_str;
> }
> cv_lab_str = new QN_InLabStream_Cut2(*train_lab_str);
>
> // Create training and CV windows.
>
> const size_t window_len = 1;
> size_t bot_margin = window_extent - window_offset - window_len;
> QN_InLabStream_RandWindow* train_winlab_str =
> new QN_InLabStream_RandWindow(debug, dbgname,
> *train_lab_str, window_len,
> window_offset, bot_margin,
> train_cache_frames, train_cache_seed
> );
> QN_InLabStream_SeqWindow* cv_winlab_str =
> new QN_InLabStream_SeqWindow(debug, dbgname,
> *cv_lab_str, window_len,
> window_offset, bot_margin
> );
> *train_str_ptr = train_winlab_str;
> *cv_str_ptr = cv_winlab_str;
> }
>
> void
> create_mlp(int debug, const char*,
> size_t n_input, size_t n_hidden, size_t n_output,
> const char* mlp3_output_type, int mlp3_bunch_size,
> int threads, bool hasConf, QN_MLP** mlp_ptr)
> {
> // Create MLP and load weights.
> QN_MLP* mlp3 = NULL;
>
> QN_OutputLayerType outlayer_type;
> if (strcmp(mlp3_output_type, "sigmoid")==0) {
> outlayer_type = QN_OUTPUT_SIGMOID;
> } else if (strcmp(mlp3_output_type, "sigmoidx")==0) {
> outlayer_type = QN_OUTPUT_SIGMOID_XENTROPY;
> } else if (strcmp(mlp3_output_type, "softmax")==0) {
> outlayer_type = QN_OUTPUT_SOFTMAX;
> } else {
> QN_ERROR("create_mlp", "unknown output unit type '%s'.",
> mlp3_output_type);
> outlayer_type = QN_OUTPUT_SIGMOID;
> }
>
>
> if (mlp3_bunch_size == 0) {
> assert(!hasConf); // confidences not implemented
> // NOT bunch
> if (config.threads==1)
> {
> mlp3 = new QN_MLP_OnlineFl3(debug, "train",
> n_input, n_hidden, n_output,
> outlayer_type);
> }
> else
> {
> QN_ERROR("create_mlp", "threads must be 1 for online "
> "training.");
> }
> } else {
> // Bunch
> if (threads>1)
> {
> #ifdef QN_HAVE_LIBPTHREAD
> if (threads>mlp3_bunch_size)
> {
> QN_ERROR("create_mlp", "number of threads must "
> "be less than the bunch size.");
> }
> else
> {
> // Bunch threaded
> assert(!hasConf); // confidences not implemented
>
> mlp3 = new QN_MLP_ThreadFl3(debug, "train",
> n_input, n_hidden,
> n_output,
> outlayer_type,
> mlp3_bunch_size,
> threads);
> }
> #else
> QN_ERROR("create_mlp",
> "cannot use multiple threads as libpthread "
> "was not linked with this executable.");
> #endif
> }
> else if (threads==1)
> {
> // Bunch unthreaded
> mlp3 = new QN_MLP_BunchFl3(debug, "train",
> n_input, n_hidden,
> n_output, outlayer_type,
> mlp3_bunch_size);
> }
> else
> {
> QN_ERROR("create_mlp","threads must be >= 1.");
> }
> }
> *mlp_ptr = mlp3;
> }
>
> void
> create_learnrate_schedule(int, const char*,
> const char* learnrate_schedule,
> float* learnrate_vals,
> size_t learnrate_count,
> float learnrate_scale,
> size_t learnrate_epochs,
> QN_RateSchedule** lr_schedule)
> {
> QN_RateSchedule* rate_sched;
> if (learnrate_scale>1.0)
> {
> QN_ERROR("create_learnrate_schedule", "Learning rate scale is %g,
> but "
> "it should be less that 1.0.");
> }
> if (strcmp(learnrate_schedule, "newbob")==0)
> {
> rate_sched = new QN_RateSchedule_NewBoB(*learnrate_vals,
> learnrate_scale,
> 0.5f, 0.5f,
> 100.0f,learnrate_epochs);
> }
> else if (strcmp(learnrate_schedule, "list")==0)
> {
> long count;
>
> if (learnrate_epochs < learnrate_count)
> count = learnrate_epochs;
> else
> count = learnrate_count;
> rate_sched = new QN_RateSchedule_List(learnrate_vals, count);
> }
> else if (strcmp(learnrate_schedule, "smoothdecay")==0)
> {
> size_t search_epochs;
>
> if (learnrate_count<3 || learnrate_count>4) {
> QN_ERROR(NULL,"learnrate_vals should have 3 or 4 values if
> learnrate_schedule is smoothdecay");
> }
>
> if (learnrate_count==4) {
> search_epochs=(size_t)learnrate_vals[3];
> } else {
> search_epochs=1;
> }
>
> QN_OUTPUT("Setting up smooth decay learning rate (lr=%.6f,decay=%.
> 6f,stopcriterion=%.
> 6f",learnrate_vals[0],learnrate_vals[1],learnrate_vals[2]);
> rate_sched = new QN_RateSchedule_SmoothDecay(learnrate_vals[0],
> learnrate_vals[1],
> learnrate_vals[2],
> search_epochs,
> 100.0f, 0,
> learnrate_epochs);
> }
> else
> {
> QN_ERROR("create_learnrate_schedule",
> "Unknown learning rate schedule '%s'.",
> learnrate_schedule);
> rate_sched = NULL;
> }
> *lr_schedule = rate_sched;
> }
>
> void
> qnstrn()
> {
> int verbose = config.verbose;
> time_t now;
>
> time(&now);
>
> // A note for the logfile, including some system info.
> QN_output_sysinfo("qnstrn");
> QN_OUTPUT("Program start: %.24s.", ctime(&now));
>
> // Open files and provisionally check arguments.
> if (verbose>0)
> {
> QN_OUTPUT("Opening feature file...");
> }
>
> // ftr files are now opened inside create_ftrstreams in order to
> // accommodate multiple pasted-together files
>
> // ftr1_file.
> // enum { FTRFILE1_BUF_SIZE = 0x8000 };
> // const char* ftr1_file = config.ftr1_file;
> // FILE* ftr1_fp = QN_open(ftr1_file, "r");
>
> // ftr2_file.
> // enum { FTRFILE2_BUF_SIZE = 0x8000 };
> // const char* ftr2_file = config.ftr2_file;
> // FILE* ftr2_fp = NULL;
> // char* ftr2_buf = NULL;
> // if (strcmp(ftr2_file, "")!=0)
> // {
> // ftr2_fp = QN_open(ftr2_file, "r");
> // }
>
> bool hasConf=strlen(config.ftr1_conf_file)>0;
> if(hasConf)
> assert(strcmp(config.ftr1_format, "pfile")==0); // only
> implemented for pfiles
>
> // unary_file.
> enum { UNARYFILE_BUF_SIZE = 0x8000 };
> const char* unary_file = config.unary_file;
> FILE* unary_fp = NULL;
> char* unary_buf = NULL;
> if (strcmp(unary_file, "")!=0)
> {
> assert(!hasConf);
> unary_fp = QN_open(unary_file, "r");
> unary_buf = new char[UNARYFILE_BUF_SIZE];
> assert(setvbuf(unary_fp, unary_buf, _IOFBF,
> UNARYFILE_BUF_SIZE)==0);
> }
>
> const char* hardtarget_file = config.hardtarget_file;
> const char* softtarget_file = config.softtarget_file;
> FILE* hardtarget_fp = NULL;
> // FILE* softtarget_fp = NULL;
> char* hardtarget_buf = NULL;
> // char* softtarget_buf = NULL;
> int lastlab_reject = config.hardtarget_lastlab_reject;
> if (strcmp(hardtarget_file, "")!=0 && strcmp(softtarget_file,
> "")==0)
> {
> // hardtarget_file.
> enum { LABFILE_BUF_SIZE = 0x8000 };
> hardtarget_fp = QN_open(hardtarget_file, "r");
> hardtarget_buf = new char[LABFILE_BUF_SIZE];
> assert(setvbuf(hardtarget_fp, hardtarget_buf, _IOFBF,
> LABFILE_BUF_SIZE)==0);
> }
> else if (strcmp(hardtarget_file, "")==0 &&
> strcmp(softtarget_file, "")!=0)
> {
> // opened within create_ftrstream
>
> // softtarget_file.
> // enum { LABFILE_BUF_SIZE = 0x8000 };
> // softtarget_fp = QN_open(softtarget_file, "r");
> // softtarget_buf = new char[LABFILE_BUF_SIZE];
> if (lastlab_reject)
> {
> QN_ERROR(NULL, "hardtarget_lastlab_reject cannot be true if no "
> "hardtarget_file is specified");
> }
> }
> else
> {
> QN_ERROR(NULL, "must specify one and only one of hardtarget_file "
> "and softtarget_file");
> }
>
>
> // ftr1_norm_file.
> FILE* ftr1_norm_fp = NULL;
> const char* ftr1_norm_file = config.ftr1_norm_file;
> if (strcmp(ftr1_norm_file, "")!=0)
> {
> ftr1_norm_fp = QN_open(ftr1_norm_file, "r");
> }
>
> // ftr2_norm_file.
> FILE* ftr2_norm_fp = NULL;
> const char* ftr2_norm_file = config.ftr2_norm_file;
> if (strcmp(ftr2_norm_file, "")!=0)
> {
> if (strcmp(config.ftr2_file, "")==0)
> QN_ERROR(NULL, "ftr2_norm_file is specified but ftr2_file "
> "is not.");
> else if (config.ftr2_ftr_count==0)
> QN_ERROR(NULL, "ftr2_norm_file is specified but ftr2_ftr_count "
> "is 0.");
> else
> ftr2_norm_fp = QN_open(ftr2_norm_file, "r");
> }
>
> // Weight files.
> FILE* init_weight_fp = NULL;
> const char* init_weight_file = config.init_weight_file;
> if (strcmp(init_weight_file, "")!=0)
> {
> init_weight_fp = QN_open(init_weight_file, "r");
> }
> FILE* out_weight_fp = NULL;
> const char* out_weight_file = config.out_weight_file;
> out_weight_fp = QN_open(out_weight_file, "w");
>
> // Windowing.
> int window_extent = config.window_extent;
> if (window_extent<0 || window_extent>1000)
> {
> QN_ERROR(NULL, "window_extent must be in range 0-1000.");
> }
> int ftr1_window_offset = config.ftr1_window_offset;
> if (ftr1_window_offset<0 || ftr1_window_offset>=window_extent)
> {
> QN_ERROR(NULL, "ftr1_window_offset must be less than "
> " window_extent.");
> }
> int ftr1_window_len = config.ftr1_window_len;
> if (ftr1_window_len<=0)
> {
> QN_ERROR(NULL, "ftr1_window_len must be greater than 0.");
> }
> if ((ftr1_window_offset + ftr1_window_len) > window_extent)
> {
> QN_ERROR(NULL, "ftr1_window_offset+ftr1_window_len must be "
> "less than window_extent.");
> }
> int ftr2_window_offset = config.ftr2_window_offset;
> int ftr2_window_len = config.ftr2_window_len;
> // don't test ftr2_window_offset unless we have a file
> if (strcmp(config.ftr2_file, "")!= 0 && config.ftr2_ftr_count >
> 0) {
> if (ftr2_window_offset<0 || ftr2_window_offset>=window_extent)
> {
> QN_ERROR(NULL, "ftr2_window_offset must be less than "
> " window_extent.");
> }
> if (ftr2_window_len<0)
> {
> QN_ERROR(NULL, "ftr2_window_len must be positive.");
> }
> if ((ftr2_window_offset + ftr2_window_len) > window_extent)
> {
> QN_ERROR(NULL, "ftr2_window_offset+ftr2_window_len must be "
> "less than window_extent.");
> }
> }
> // Don't worry about the unary_window_offset unless there is
> actually
> // a unary_file (default value of 3 causes error for
> window_extent=1)
> int unary_window_offset = config.unary_window_offset;
> if ( (strcmp(unary_file, "")!=0) \
> && (unary_window_offset<0 ||
> unary_window_offset>=window_extent))
> {
> QN_ERROR(NULL, "unary_window_offset must be less than "
> " window_extent.");
> }
> int hardtarget_window_offset = config.hardtarget_window_offset;
> if (hardtarget_window_offset<0 ||
> hardtarget_window_offset>=window_extent)
> {
> QN_ERROR(NULL, "hardtarget_window_offset must be less than "
> " window_extent.");
> }
> int softtarget_window_offset = config.softtarget_window_offset;
> if (softtarget_window_offset<0 ||
> softtarget_window_offset>=window_extent)
> {
> QN_ERROR(NULL, "softtarget_window_offset must be less than "
> " window_extent.");
> }
>
> // Check for overlapping training and CV ranges.
> size_t train_sent_start = config.train_sent_start;
> size_t train_sent_count = (config.train_sent_count==INT_MAX) ?
> (size_t) QN_ALL : config.train_sent_count;
> size_t last_train_sent = (train_sent_count==QN_ALL) ?
> INT_MAX : train_sent_start + train_sent_count - 1;
> char* train_sent_range = config.train_sent_range;
> size_t cv_sent_start = config.cv_sent_start;
> size_t cv_sent_count = (config.cv_sent_count==INT_MAX) ?
> (size_t) QN_ALL : config.cv_sent_count;
> char* cv_sent_range = config.cv_sent_range;
> size_t last_cv_sent = (cv_sent_count==QN_ALL) ?
> INT_MAX : cv_sent_start + cv_sent_count - 1;
> if (train_sent_range == 0 && cv_sent_range == 0 &&
> ((cv_sent_start>=train_sent_start && cv_sent_start<=last_train_sent)
> || (last_cv_sent>=train_sent_start &&
> last_cv_sent<=last_train_sent)))
> {
> QN_WARN(NULL, "training and cv sentence ranges overlap.");
> }
>
> // Check for mlp3_input_size consistency.
> size_t ftr1_ftr_start = config.ftr1_ftr_start;
> size_t ftr2_ftr_start = config.ftr2_ftr_start;
> size_t ftr1_ftr_count = config.ftr1_ftr_count;
> size_t ftr2_ftr_count = config.ftr2_ftr_count;
> size_t unary_size = config.unary_size;
> size_t ftrfile_num_input = ftr1_ftr_count * ftr1_window_len
> + ftr2_ftr_count * ftr2_window_len + unary_size;
> size_t mlp3_input_size = config.mlp3_input_size;
> size_t mlp3_hidden_size = config.mlp3_hidden_size;
> size_t mlp3_output_size = config.mlp3_output_size;
> if (ftrfile_num_input!=mlp3_input_size)
> {
> QN_ERROR(NULL, "number of inputs to the net %d does not equal width"
> " of data stream from feature files %d.", mlp3_input_size,
> ftrfile_num_input);
> }
>
> // Sentence and randomization details.
> long train_cache_frames = config.train_cache_frames;
> int train_cache_seed = config.train_cache_seed;
> if (train_cache_frames<1000)
> {
> QN_ERROR(NULL, "train_cache_frames must be greater than 1000.");
> }
>
>
> int init_random_seed = config.init_random_seed;
> int debug = config.debug;
>
> // Do ftr1_file stream creation.
> QN_InFtrStream* ftr1_train_str = NULL;
> QN_InFtrStream* ftr1_cv_str = NULL;
> create_ftrstreams(debug, "ftr1_file", config.ftr1_file,
> config.ftr1_format, config.ftr1_width,
> ftr1_norm_fp,
> ftr1_ftr_start, ftr1_ftr_count,
> train_sent_start, train_sent_count,
> train_sent_range,
> cv_sent_start, cv_sent_count,
> cv_sent_range,
> window_extent,
> ftr1_window_offset, ftr1_window_len,
> config.ftr1_delta_order, config.ftr1_delta_win,
> config.ftr1_norm_mode,
> config.ftr1_norm_am, config.ftr1_norm_av,
> train_cache_frames, train_cache_seed,
> &ftr1_train_str, &ftr1_cv_str);
>
> // Confidences for ftr1_train (must be same format, size as ftr1)
> QN_InFtrStream* ftrfile_conf_train_str = NULL;
> QN_InFtrStream* ftrfile_conf_cv_str = NULL;
> if(hasConf) {
> create_ftrstreams(debug, "ftr1_conf_file", config.ftr1_conf_file,
> config.ftr1_format, 0, // width=0 allows
> conf_dim==1
> NULL, // prevent normalization
> ftr1_ftr_start, 0, // count==0 allows
> conf_dim==1
> train_sent_start, train_sent_count,
> train_sent_range,
> cv_sent_start, cv_sent_count,
> cv_sent_range,
> window_extent,
> ftr1_window_offset, ftr1_window_len,
> config.ftr1_delta_order, config.ftr1_delta_win,
> config.ftr1_norm_mode,
> config.ftr1_norm_am, config.ftr1_norm_av,
> train_cache_frames, train_cache_seed,
> &ftrfile_conf_train_str, &ftrfile_conf_cv_str);
> }
>
> // Do ftr2_file stream creation.
> QN_InFtrStream* ftr2_train_str = NULL;
> QN_InFtrStream* ftr2_cv_str = NULL;
> if (strcmp(config.ftr2_file, "")!=0)
> {
> assert(!hasConf); // confs not implemented for ftr2
>
> if (config.ftr2_ftr_count==0)
> QN_WARN(NULL, "ftr2_file is set but ftr2_ftr_count is 0.");
> create_ftrstreams(debug, "ftr2_file", config.ftr2_file,
> config.ftr2_format, config.ftr2_width,
> ftr2_norm_fp,
> ftr2_ftr_start, ftr2_ftr_count,
> train_sent_start, train_sent_count,
> train_sent_range,
> cv_sent_start, cv_sent_count,
> cv_sent_range,
> window_extent,
> ftr2_window_offset, ftr2_window_len,
> config.ftr2_delta_order, config.ftr2_delta_win,
> config.ftr2_norm_mode,
> config.ftr2_norm_am, config.ftr2_norm_av,
> train_cache_frames, train_cache_seed,
> &ftr2_train_str, &ftr2_cv_str);
> }
>
> // Merge the two training feature streams.
> QN_InFtrStream* ftrfile_train_str;
> QN_InFtrStream* ftrfile_cv_str;
> if (ftr2_train_str!=NULL)
> {
> assert(ftr2_cv_str!=NULL);
> ftrfile_train_str = new QN_InFtrStream_JoinFtrs(debug,
> "train_ftrfile",
> *ftr1_train_str,
> *ftr2_train_str);
> ftrfile_cv_str = new QN_InFtrStream_JoinFtrs(debug, "cv_ftrfile",
> *ftr1_cv_str,
> *ftr2_cv_str);
> }
> else
> {
> assert(ftr2_cv_str==NULL);
> assert(ftr2_train_str==NULL);
> ftrfile_train_str = ftr1_train_str;
> ftrfile_cv_str = ftr1_cv_str;
> }
>
> // If necessary, add the unary input feature.
> if (unary_fp!=NULL)
> {
> assert(!hasConf); // confs not implemented for this
> QN_InLabStream* unary_train_str = NULL;
> QN_InLabStream* unary_cv_str = NULL;
>
> create_labstreams(debug, "unary", unary_fp,
> "pfile", 0,
> train_sent_start, train_sent_count,
> train_sent_range,
> cv_sent_start, cv_sent_count,
> cv_sent_range,
> window_extent,
> unary_window_offset,
> train_cache_frames, train_cache_seed,
> &unary_train_str, &unary_cv_str);
>
> // Convert the unary input label into a feature stream.
> QN_InFtrStream* unaryftr_train_str = NULL;
> QN_InFtrStream* unaryftr_cv_str = NULL;
>
> unaryftr_train_str = new QN_InFtrStream_OneHot(debug,
> "train_unaryfile",
> *unary_train_str,
> unary_size);
> unaryftr_cv_str = new QN_InFtrStream_OneHot(debug,
> "cv_unaryfile",
> *unary_cv_str,
> unary_size);
>
> // Merge in the feature streams.
> ftrfile_train_str = new QN_InFtrStream_JoinFtrs(debug,
> "train_unaryfile",
> *ftrfile_train_str,
> *unaryftr_train_str);
> ftrfile_cv_str = new QN_InFtrStream_JoinFtrs(debug, "cv_unaryfile",
> *ftrfile_cv_str,
> *unaryftr_cv_str);
>
> }
>
>
> QN_InLabStream* hardtarget_train_str = NULL;
> QN_InLabStream* hardtarget_cv_str = NULL;
> QN_InFtrStream* softtarget_train_str = NULL;
> QN_InFtrStream* softtarget_cv_str = NULL;
>
> // Does config.ftr1_file refer to just a single file?
> int ftr1_onefile = 1;
> if (strchr(config.ftr1_file, ',') != NULL) {
> // filename looks like a comma-separated list
> ftr1_onefile = 0;
> // won't try to run pathcmp on it.
> }
>
> if (hardtarget_fp!=NULL)
> {
> // Do hardtarget stream creation.
>
> // Handle formats where we need to know the number of ftrs to
> // extract the labels.
> // A bit of a hack!!
> size_t hardtarget_width;
> if (ftr1_onefile && QN_pathcmp(config.ftr1_file, hardtarget_file)==0)
> hardtarget_width = config.ftr1_width;
> else
> hardtarget_width = 0;
> char* hardtarget_format = config.hardtarget_format;
> if (strcmp(hardtarget_format, "")==0)
> hardtarget_format = config.ftr1_format;
>
> create_labstreams(debug, "hardtarget", hardtarget_fp,
> hardtarget_format, hardtarget_width,
> train_sent_start, train_sent_count,
> train_sent_range,
> cv_sent_start, cv_sent_count,
> cv_sent_range,
> window_extent,
> hardtarget_window_offset,
> train_cache_frames, train_cache_seed,
> &hardtarget_train_str, &hardtarget_cv_str);
> }
> else if (strcmp(softtarget_file,"")!=0)
> {
> assert(!hasConf); // confs not implemented for this
> size_t softtarget_width = config.softtarget_width;
> char* softtarget_format = config.softtarget_format;
> if (strcmp(softtarget_format, "")==0)
> softtarget_format = config.ftr1_format;
>
> create_ftrstreams(debug, "softtarget", (char *)softtarget_file,
> softtarget_format, softtarget_width,
> NULL,
> 0, 0,
> train_sent_start, train_sent_count,
> train_sent_range,
> cv_sent_start, cv_sent_count,
> cv_sent_range,
> window_extent,
> softtarget_window_offset, 1,
> 0, 0, 0, /* no deltas or per-utt normalization */
> 0.0, 0.0,
> train_cache_frames, train_cache_seed,
> &softtarget_train_str, &softtarget_cv_str);
>
> }
> else
> assert(0);
>
>
> // Create the MLP.
> QN_MLP* mlp;
> create_mlp(debug, "mlp",
> mlp3_input_size,mlp3_hidden_size,
> mlp3_output_size,config.mlp3_output_type,
> config.mlp3_bunch_size, config.threads,hasConf,
> &mlp);
>
> // Create the leaning rate schedule.
> QN_RateSchedule* lr_schedule;
> create_learnrate_schedule(debug, "learnrate",
> config.learnrate_schedule,
> config.learnrate_vals.vals,
> config.learnrate_vals.count,
> config.learnrate_scale,
> config.learnrate_epochs,
> &lr_schedule);
>
>
> // A weight file of "" means randomize.
> if (init_weight_fp==NULL)
> {
> if (verbose>0)
> {
> QN_OUTPUT("Randomizing weights...");
> }
> if (config.init_random_weight_min.count<1 ||
> config.init_random_weight_min.count>2 ||
> config.init_random_weight_max.count<1 ||
> config.init_random_weight_max.count>2 ||
> config.init_random_bias_min.count<1 ||
> config.init_random_bias_min.count>2 ||
> config.init_random_bias_max.count<1 ||
> config.init_random_bias_max.count>2) {
> QN_ERROR(NULL,"weight/bias list initializations must either have
> 1 or 2 elements");
> }
> float in2hid_min = config.init_random_weight_min.vals[0];
> float in2hid_max = config.init_random_weight_max.vals[0];
> float hidbias_min = config.init_random_bias_min.vals[0];
> float hidbias_max = config.init_random_bias_max.vals[0];
> /* if initialization lists have 1 member, use for both layer 1 and 2
> if 2 members, use separate initializations */
> float hid2out_min =
> config
> .init_random_weight_min
> .vals[(config.init_random_weight_min.count==1)?0:1];
> float hid2out_max =
> config
> .init_random_weight_max
> .vals[(config.init_random_weight_max.count==1)?0:1];
> float outbias_min =
> config
> .init_random_bias_min.vals[(config.init_random_bias_min.count==1)?
> 0:1];
> float outbias_max =
> config
> .init_random_bias_max.vals[(config.init_random_bias_max.count==1)?
> 0:1];
>
> QN_randomize_weights(debug, init_random_seed, *mlp,
> in2hid_min, in2hid_max,
> hidbias_min, hidbias_max,
> hid2out_min, hid2out_max,
> outbias_min, outbias_max);
> if (verbose>0)
> {
> QN_OUTPUT("Randomized weights.");
> }
> }
> else
> {
> float min, max;
> if (verbose>0)
> {
> QN_OUTPUT("Loading weights...");
> }
> QN_MLPWeightFile_RAP3 inwfile(debug, init_weight_fp,
> QN_READ,
> init_weight_file,
> mlp3_input_size, mlp3_hidden_size,
> mlp3_output_size);
> QN_read_weights(inwfile, *mlp, &min, &max, debug);
> QN_OUTPUT("Weights loaded from file, min=%g max=%g.",
> min, max);
> }
>
> const char* log_weight_file = config.log_weight_file;
> size_t train_chunk_size; // The number of presentations read
> // at one time.
> size_t mlp3_bunch_size = config.mlp3_bunch_size;
> if (mlp3_bunch_size>1)
> {
> train_chunk_size = mlp3_bunch_size;
> }
> else
> train_chunk_size = 16; // By default, use a size of 16.
> if (hardtarget_train_str!=NULL)
> {
> assert(hardtarget_cv_str!=NULL);
> QN_HardSentTrainer* trainer =
> new QN_HardSentTrainer(debug, // Debugging level.
> "trainer", // Debugging tag.
> verbose, // Verbosity level.
> mlp, // MLP.
> ftrfile_train_str, // Training ftr strm.
> hardtarget_train_str, // Training label str.
> ftrfile_cv_str, // CV feature stream.
> hardtarget_cv_str, // CV label stream.
> ftrfile_conf_train_str, // Train
> conf ftr strm.
> ftrfile_conf_cv_str, // CV conf
> ftr stream.
> lr_schedule, // Learning rate scheduler.
> 0.0, // Low target.
> 1.0, // High target.
> log_weight_file, // Where we log weights.
> train_chunk_size, // Batch size.
> lastlab_reject // Allow untrainable frames
> );
> trainer->train();
> delete trainer;
> }
> else
> {
> assert(softtarget_train_str!=NULL);
> assert(softtarget_cv_str!=NULL);
> assert(!hasConf); // confs not implemented for this
>
> QN_SoftSentTrainer* trainer =
> new QN_SoftSentTrainer(debug, // Debugging level.
> "trainer", // Debugging tag.
> verbose, // Verbosity level.
> mlp, // MLP.
> ftrfile_train_str, // Training ftr strm.
> softtarget_train_str, // Training label str.
> ftrfile_cv_str, // CV feature stream.
> softtarget_cv_str, // CV label stream.
> lr_schedule, // Learning rate scheduler.
> 0.0, // Low target.
> 1.0, // High target.
> log_weight_file, // Where we log weights.
> train_chunk_size // Batch size.
> );
> trainer->train();
> delete trainer;
> }
>
> if (verbose>0)
> {
> QN_OUTPUT("Starting to write weights...");
> }
> float min, max;
> QN_MLPWeightFile_RAP3 outwfile(debug, out_weight_fp, QN_WRITE,
> out_weight_file,
> mlp3_input_size, mlp3_hidden_size,
> mlp3_output_size);
> QN_write_weights(outwfile, *mlp, &min, &max, debug);
> QN_OUTPUT("Weights written to '%s'.", out_weight_file);
>
> // A note for the logfile.
> time(&now);
> QN_OUTPUT("Program stop: %.24s", ctime(&now));
> delete mlp;
>
> if (out_weight_fp!=NULL)
> QN_close(out_weight_fp);
> if (init_weight_fp!=NULL)
> QN_close(init_weight_fp);
> if (ftr2_norm_fp!=NULL)
> QN_close(ftr2_norm_fp);
> if (ftr1_norm_fp!=NULL)
> QN_close(ftr1_norm_fp);
> // if (softtarget_fp!=NULL)
> // {
> // QN_close(softtarget_fp);
> // delete softtarget_buf;
> // }
> if (hardtarget_fp!=NULL)
> {
> QN_close(hardtarget_fp);
> delete [] hardtarget_buf;
> }
> if (unary_fp!=NULL)
> {
> QN_close(unary_fp);
> delete unary_buf;
> }
> // if (ftr2_fp!=NULL)
> // {
> // QN_close(ftr2_fp);
> // delete ftr2_buf;
> // }
> // QN_close(ftr1_fp);
> // delete ftr1_buf;
> QN_close_ftrfiles();
> }
>
> int
> main(int argc, const char* argv[])
> {
> char* progname; // The name of the prog - set by QN_initargs.
>
> FILE* log_fp;
> char log_buf[160];
>
>
> set_defaults();
> QN_initargs(&argtab[0], &argc, &argv, &progname);
>
> // map norm_mode_str to val
> config.ftr1_norm_mode =
> QN_string_to_norm_const(config.ftr1_norm_mode_str);
> config.ftr2_norm_mode =
> QN_string_to_norm_const(config.ftr2_norm_mode_str);
>
> // Seed the random number generator.
> srand48(config.init_random_seed);
>
> log_fp = QN_open(config.log_file, "w");
> assert(setvbuf(log_fp, log_buf, _IOLBF, sizeof(log_buf))==0);
>
> QN_printargs(log_fp, progname, &argtab[0]);
> QN_logger = new QN_Logger_Simple(log_fp, stderr, progname);
>
> // Install our own out-of-memory handler if possible.
> #ifdef QN_HAVE_SET_NEW_HANDLER
> set_new_handler(QN_new_handler);
> #endif
>
> // Set the math mode
> qn_math = config.mlp3_pp ? QN_MATH_PP : QN_MATH_NV;
> #ifdef QN_HAVE_LIBBLAS
> qn_math |= config.mlp3_blas ? QN_MATH_BL : 0;
> #else
> if (config.mlp3_blas)
> {
> QN_ERROR(NULL, "cannot enable BLAS library as none is linked with
> the "
> "executable.");
> }
> #endif // #ifdef QN_HAVE_LIBBLAS
>
> qnstrn();
>
> exit(EXIT_SUCCESS);
> }
> _______________________________________________
> Emacs-orgmode mailing list
> Remember: use `Reply All' to send replies to the list.
> Emacs-orgmode@gnu.org
> http://lists.gnu.org/mailman/listinfo/emacs-orgmode
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: bug in org-store-link
2008-02-27 14:55 ` Carsten Dominik
@ 2008-02-27 16:20 ` Nick Dokos
2008-02-27 20:28 ` Scott Otterson
2008-02-27 19:05 ` Scott Otterson
1 sibling, 1 reply; 7+ messages in thread
From: Nick Dokos @ 2008-02-27 16:20 UTC (permalink / raw)
To: Carsten Dominik; +Cc: Scott Otterson, emacs-orgmode
Carsten Dominik <dominik@science.uva.nl> wrote:
> Hi Scott, this is not a small bug, but a problem that is really hard
> to solve.
> Supposed I used the exact line text to search, then you still have two
> lines in the buffer
> that would match.
>
> This is really about what strategy should be used to find a location
> in a file that has possibly changed.
> I have no good answer to that. Do you?
>
Two suggestions:
o Use a line number, instead of a search pattern and don't worry
about subsequent edits to the file that the link points to.
o Use the find-tag strategy: go to the line number as an initial
approximation. If the pattern is found there, done; if not, search
around that point for the pattern and keep expanding the area of the
search until found. I don't know if they still do it that way but I
think that's how it was done some time ago.
Nick
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: bug in org-store-link
2008-02-27 14:55 ` Carsten Dominik
2008-02-27 16:20 ` Nick Dokos
@ 2008-02-27 19:05 ` Scott Otterson
1 sibling, 0 replies; 7+ messages in thread
From: Scott Otterson @ 2008-02-27 19:05 UTC (permalink / raw)
To: Carsten Dominik; +Cc: emacs-orgmode
Yeah, I guess that instead of saying it was a small bug, I should have
said that it's a bug of small consequence (for most users, but matters
to me, least).
The ambiguity problem you mention could be solved by matching more than
one line. To keep the string stored in the org link short,
org-store-link could expand it to another line only when needed for a
unique match. Or, it could expand just enough _words_ to ensure
uniqueness, plus maybe one word on each end for some insurance against
future changes.
Future changes are the harder part. In speech recognition, there's an
analogous problem where there's a need to match a transcript to
recognized speech, which may have a lot of word errors, insertions and
deletions. The simplest solution commonly employed is a word-level
Levenshtein distance:
http://en.wikipedia.org/wiki/Levenshtein_distance
(this is for chars, but you get the idea)
Scott
Carsten Dominik wrote:
> Hi Scott, this is not a small bug, but a problem that is really hard
> to solve.
> Supposed I used the exact line text to search, then you still have two
> lines in the buffer
> that would match.
>
> This is really about what strategy should be used to find a location
> in a file that has possibly changed.
> I have no good answer to that. Do you?
>
> - Carsten
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: bug in org-store-link
2008-02-27 16:20 ` Nick Dokos
@ 2008-02-27 20:28 ` Scott Otterson
2008-02-27 20:57 ` Phil Jackson
0 siblings, 1 reply; 7+ messages in thread
From: Scott Otterson @ 2008-02-27 20:28 UTC (permalink / raw)
To: nicholas.dokos; +Cc: emacs-orgmode
[-- Attachment #1: Type: text/html, Size: 1684 bytes --]
[-- Attachment #2: Type: text/plain, Size: 204 bytes --]
_______________________________________________
Emacs-orgmode mailing list
Remember: use `Reply All' to send replies to the list.
Emacs-orgmode@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-orgmode
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: bug in org-store-link
2008-02-27 20:28 ` Scott Otterson
@ 2008-02-27 20:57 ` Phil Jackson
2008-02-27 23:05 ` Carsten Dominik
0 siblings, 1 reply; 7+ messages in thread
From: Phil Jackson @ 2008-02-27 20:57 UTC (permalink / raw)
To: Scott Otterson; +Cc: emacs-orgmode
Scott Otterson <scotto@u.washington.edu> writes:
[...]
> + expand matching pattern outwards until matching uniqueness is
> assured across the whole file
This isn't foolproof as an identical line could be added later leaving
us with much the same problem.
> + store line number
This actually is already possible, but not the default behaviour for
org-store-link.
Also, from the manual:
,----[ org.html#Handling-links ]
| If there is an active region, the selected words will form the basis of
| the search string.
`----
Cheers,
Phil
--
Phil Jackson
http://www.shellarchive.co.uk
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: bug in org-store-link
2008-02-27 20:57 ` Phil Jackson
@ 2008-02-27 23:05 ` Carsten Dominik
0 siblings, 0 replies; 7+ messages in thread
From: Carsten Dominik @ 2008-02-27 23:05 UTC (permalink / raw)
To: Phil Jackson; +Cc: Scott Otterson, emacs-orgmode
[-- Attachment #1.1: Type: text/plain, Size: 965 bytes --]
On Feb 27, 2008, at 9:57 PM, Phil Jackson wrote:
> Scott Otterson <scotto@u.washington.edu> writes:
>
> [...]
>
>> + expand matching pattern outwards until matching uniqueness is
>> assured across the whole file
>
> This isn't foolproof as an identical line could be added later leaving
> us with much the same problem.
>
>> + store line number
>
> This actually is already possible, but not the default behaviour for
> org-store-link.
>
> Also, from the manual:
>
> ,----[ org.html#Handling-links ]
> | If there is an active region, the selected words will form the
> basis of
> | the search string.
> `----
Org-mode has an extension mechanism that can be used to built custom
searches
for file links. Maybe someone would enjoy writing an extension,
and if it works well we could consider making it more standard.
Take a look at http://orgmode.org/manual/Custom-searches.html#Custom-searches
and at the bibtex-related code in org.el.
Cheers
- Carsten
[-- Attachment #1.2: Type: text/html, Size: 2081 bytes --]
[-- Attachment #2: Type: text/plain, Size: 204 bytes --]
_______________________________________________
Emacs-orgmode mailing list
Remember: use `Reply All' to send replies to the list.
Emacs-orgmode@gnu.org
http://lists.gnu.org/mailman/listinfo/emacs-orgmode
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2008-02-27 23:05 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-26 20:40 bug in org-store-link Scott Otterson
2008-02-27 14:55 ` Carsten Dominik
2008-02-27 16:20 ` Nick Dokos
2008-02-27 20:28 ` Scott Otterson
2008-02-27 20:57 ` Phil Jackson
2008-02-27 23:05 ` Carsten Dominik
2008-02-27 19:05 ` Scott Otterson
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs/org-mode.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).