|
||||||||||
PREV NEXT | FRAMES NO FRAMES |
ca.uottawa.* |
---|
ca.uottawa.balie.Balie | ||
---|---|---|
public static final boolean |
ALLOW_FUZZY_MATCH |
true |
public static final boolean |
DEBUG_ABBREVIATION_LOOKUP |
false |
public static final boolean |
DEBUG_LANGUAGE_IDENTIFICATION |
false |
public static final boolean |
DEBUG_LIGATURE |
false |
public static final boolean |
DEBUG_NAMED_ENTITY_RECOGNITION |
false |
public static final boolean |
DEBUG_NAMED_ENTITY_RECOGNITION_FULL |
false |
public static final boolean |
DEBUG_PRINT_SBD_TEST_CORPUS |
false |
public static final boolean |
DEBUG_PUNCT_LOOKUP |
false |
public static final boolean |
DEBUG_TOKEN |
false |
public static final boolean |
DEBUG_TOKENIZER |
false |
public static final boolean |
DEBUG_UNBREAKABLE_LOOKUP |
false |
public static final java.lang.String |
ENCODING_DEFAULT |
"default" |
public static final java.lang.String |
ENCODING_LITTLE_INDIAN |
"UnicodeLittle" |
public static final java.lang.String |
ENCODING_UTF8 |
"utf8" |
public static final java.lang.String |
ENGLISH_TOKEN_LIST_ON_DISK |
"TokenListEng.sig" |
public static final java.lang.String |
FRENCH_TOKEN_LIST_ON_DISK |
"TokenListFre.sig" |
public static final java.lang.String |
GERMAN_TOKEN_LIST_ON_DISK |
"TokenListGer.sig" |
public static final java.lang.String |
ITALIAN_TOKEN_LIST_ON_DISK |
"TokenListIta.sig" |
public static final java.lang.String |
LANGUAGE_ENGLISH |
"English" |
public static final java.lang.String |
LANGUAGE_FRENCH |
"French" |
public static final java.lang.String |
LANGUAGE_GERMAN |
"German" |
public static final java.lang.String |
LANGUAGE_ID_MODEL |
"LanguageIdentificationModel.sig" |
public static final java.lang.String |
LANGUAGE_ID_TESTING_CORPUS |
"../BaLIECorpora/Corpus - Language Identification/Test" |
public static final java.lang.String |
LANGUAGE_ID_TRAINING_CORPUS |
"../BaLIECorpora/Corpus - Language Identification/Train" |
public static final java.lang.String |
LANGUAGE_ITALIAN |
"Italian" |
public static final java.lang.String |
LANGUAGE_ROMANIAN |
"Romanian" |
public static final java.lang.String |
LANGUAGE_SPANISH |
"Spanish" |
public static final java.lang.String |
LANGUAGE_UNKNOWN |
"Unknown" |
public static final int |
MAX_NUMBER_FUZZY_VARIANTS |
5 |
public static final int |
MAX_TOKEN_PER_ENTITY |
5 |
public static final int |
MIN_SIZE_FOR_FUZZY_MATCH |
8 |
public static final java.lang.String |
OUT_LI_TEST_MODEL |
"../BaLIECorpora/Arff/LangIDTestModel.arff" |
public static final java.lang.String |
OUT_LI_TRAIN_MODEL |
"../BaLIECorpora/Arff/LangIDTrainModel.arff" |
public static final java.lang.String |
OUT_SBD_TEST_MODEL |
"../BaLIECorpora/Arff/SBDTestModel.arff" |
public static final java.lang.String |
OUT_SBD_TRAIN_MODEL |
"../BaLIECorpora/Arff/SBDTrainModel.arff" |
public static final java.lang.String |
ROMANIAN_TOKEN_LIST_ON_DISK |
"TokenListRom.sig" |
public static final java.lang.String |
SBR_MODEL |
"SentenceBoundariesRecognition.sig" |
public static final java.lang.String |
SBR_TESTING_CORPUS_PC |
"../BaLIECorpora/Corpus - Sentence Boundaries/test.xml" |
public static final java.lang.String |
SBR_TRAINING_CORPUS_PC |
"../BaLIECorpora/Corpus - Sentence Boundaries/train.xml" |
public static final java.lang.String |
SPANISH_TOKEN_LIST_ON_DISK |
"TokenListSpa.sig" |
public static final java.lang.String |
UNBREAK_TOKEN_LIST_ON_DISK |
"TokenListUnb.sig" |
ca.uottawa.balie.Canonizer | ||
---|---|---|
public static final int |
RULE_EXPAND_LIGATURES |
8 |
public static final int |
RULE_LOWERCASE |
1 |
public static final int |
RULE_NORMALIZE_PUNCT |
2 |
public static final int |
RULE_REMOVE_INTERNAL_PUNCT |
4 |
public static final int |
RULE_STRIP_ACCENTS |
16 |
ca.uottawa.balie.HashCodeUtil | ||
---|---|---|
public static final int |
SEED |
23 |
ca.uottawa.balie.TokenConsts | ||
---|---|---|
public static final int |
POS_ADJECTIVE |
4 |
public static final int |
POS_ADVERB |
16 |
public static final int |
POS_CONJUNCTION |
32 |
public static final int |
POS_DETERMINER |
1 |
public static final int |
POS_NOUN |
2 |
public static final int |
POS_NUMBER |
536870912 |
public static final int |
POS_NUMBER_LIKE |
1073741824 |
public static final int |
POS_PARTICLE |
128 |
public static final int |
POS_POSSESSIVE |
512 |
public static final int |
POS_PREPOSITION |
64 |
public static final int |
POS_PRONOUN |
256 |
public static final int |
POS_PROPER_NAME_LIKE |
268435456 |
public static final int |
POS_UNKNOWN |
-2147483648 |
public static final int |
POS_VERB |
8 |
public static final int |
PUNCT_AMPERSAND |
1048576 |
public static final int |
PUNCT_APOSTROPHE |
1024 |
public static final int |
PUNCT_BACK_SLASH |
8192 |
public static final int |
PUNCT_CLOSE_BRACKET |
131072 |
public static final int |
PUNCT_CLOSE_PARENTHESIS |
128 |
public static final int |
PUNCT_COLON |
32 |
public static final int |
PUNCT_COMMA |
2 |
public static final int |
PUNCT_COMMERCIAL_AT |
512 |
public static final int |
PUNCT_COPYRIGHT |
33554432 |
public static final int |
PUNCT_CURRENCY |
16777216 |
public static final int |
PUNCT_DASH |
2048 |
public static final int |
PUNCT_EXCLAMATION |
4 |
public static final int |
PUNCT_INTERROGATION |
8 |
public static final int |
PUNCT_INV_EXCLAMATION |
262144 |
public static final int |
PUNCT_INV_INTERROGATION |
524288 |
public static final int |
PUNCT_LINEFEED |
536870912 |
public static final int |
PUNCT_MISC_ARITHMETIC |
16384 |
public static final int |
PUNCT_NEWLINE |
1073741824 |
public static final int |
PUNCT_OPEN_BRACKET |
65536 |
public static final int |
PUNCT_OPEN_PARENTHESIS |
64 |
public static final int |
PUNCT_PERCENT |
32768 |
public static final int |
PUNCT_PERIOD |
1 |
public static final int |
PUNCT_PIPE |
8388608 |
public static final int |
PUNCT_QUOTE |
256 |
public static final int |
PUNCT_SEMI_COLON |
16 |
public static final int |
PUNCT_SLASH |
4096 |
public static final int |
PUNCT_TABULATION |
268435456 |
public static final int |
PUNCT_TILDE |
4194304 |
public static final int |
PUNCT_UNDERSCORE |
2097152 |
public static final int |
PUNCT_UNKNOWN |
-2147483648 |
public static final int |
TYPE_PUNCTUATION |
1 |
public static final int |
TYPE_WORD |
2 |
ca.uottawa.balie.TokenFeature | ||
---|---|---|
public static final double |
UNKNOWN_NUMERIC_VALUE |
-1.0 |
ca.uottawa.balie.WekaAttributeSelection | ||
---|---|---|
public static final int |
DEFAULT_NUM_ATTRIBUTES |
100 |
public static final int |
WEKA_CHI_SQUARE |
0 |
public static final int |
WEKA_INFO_GAIN |
1 |
public static final int |
WEKA_ONER |
6 |
public static final int |
WEKA_RELIEF |
5 |
public static final int |
WEKA_SVM |
4 |
public static final int |
WEKA_SYM_UNCERT |
3 |
public static final int |
WEKA_WRAPPER |
2 |
ca.uottawa.balie.WekaPersistance | ||
---|---|---|
public static final int |
PRINT_TESTING_SET |
1 |
public static final int |
PRINT_TRAINING_SET |
0 |
|
||||||||||
PREV NEXT | FRAMES NO FRAMES |