Pikey simplifies building Gate applications. Pikey contains a custom application loader which uses a portable format.
The Pikey format follows the property list configuration syntax, with Mustache for variable substitution. For an elaborate example, download pikey-annie.zip. Example of a simple Gate application:
{ pipeline = ( { title = "reset"; class = "gate.creole.annotdelete.AnnotationDeletePR"; params = { setsToKeep = ("Key"); keepOriginalMarkupsAS = true; } }, { title = "tokenizer"; class = "gate.creole.tokeniser.DefaultTokeniser"; dependencies = ("gate.creole.tokeniser.SimpleTokeniser", "gate.creole.Transducer"); params = { encoding = "UTF-8"; tokeniserRulesURL = "{{basedir}}/tokeniser/DefaultTokeniser.rules"; transducerGrammarURL = "{{basedir}}/tokeniser/postprocess.jape"; } }, { title = "sentence splitter"; class = "gate.creole.splitter.SentenceSplitter"; params = { encoding = "UTF-8"; gazetteerListsURL = "{{basedir}}/sentenceSplitter/gazetteer/lists.def"; transducerURL = "{{basedir}}/sentenceSplitter/grammar/main.jape"; } } }
In the above example, {{basedir}} refers to the folder where the Gate application file is located. At present, {{basedir}} is the only built-in variable. User-defined variables can be added and referenced in the same way:
{ vars = { keyset = "Key"; encoding = "UTF-8"; plugins = "/opt/gate-7.1/plugins" } pipeline = ( { title = "reset"; class = "gate.creole.annotdelete.AnnotationDeletePR"; params = { setsToKeep = ("{{keyset}}"); keepOriginalMarkupsAS = true; } }, { title = "tokenizer"; class = "gate.creole.tokeniser.DefaultTokeniser"; dependencies = ("gate.creole.tokeniser.SimpleTokeniser", "gate.creole.Transducer"); params = { encoding = "{{encoding}}"; tokeniserRulesURL = "{{plugins}}/tokeniser/DefaultTokeniser.rules"; transducerGrammarURL = "{{plugins}}/tokeniser/postprocess.jape"; } }); }
Processing resources which are not included in the Gate distribution need an explicit classpath to tell Pikey where to look for class files.
{ pipeline = ( { title = "reset"; class = "gate.creole.annotdelete.AnnotationDeletePR"; params = { setsToKeep = ("Key"); keepOriginalMarkupsAS = true; } }, { title = "tokenizer"; class = "gate.creole.tokeniser.DefaultTokeniser"; dependencies = ("gate.creole.tokeniser.SimpleTokeniser", "gate.creole.Transducer"); params = { encoding = "UTF-8"; tokeniserRulesURL = "{{basedir}}/tokeniser/DefaultTokeniser.rules"; transducerGrammarURL = "{{basedir}}/tokeniser/postprocess.jape"; } }, { title = "langid"; class = "org.knallgrau.utils.textcat.LanguageIdentifier"; classpath = ("{{basedir}}/lib/language-identification.jar") params = { languageFeatureName = "lang"; configURL = "{{basedir}}/languageIdentification/default-iso.conf"; } }, ... ); }
In Gate, processing resource can run conditionally, depending on a document feature. The following shows how to run a Gazetteer on English text only.
{ pipeline = ( { title = "reset"; class = "gate.creole.annotdelete.AnnotationDeletePR"; params = { setsToKeep = ("Key"); keepOriginalMarkupsAS = true; } }, { title = "tokenizer"; class = "gate.creole.tokeniser.DefaultTokeniser"; dependencies = ("gate.creole.tokeniser.SimpleTokeniser", "gate.creole.Transducer"); params = { encoding = "UTF-8"; tokeniserRulesURL = "{{basedir}}/tokeniser/DefaultTokeniser.rules"; transducerGrammarURL = "{{basedir}}/tokeniser/postprocess.jape"; } }, { title = "langid"; class = "org.knallgrau.utils.textcat.LanguageIdentifier"; classpath = ("{{basedir}}/lib/language-identification.jar") params = { languageFeatureName = "lang"; configURL = "{{basedir}}/languageIdentification/default-iso.conf"; } }, { title = "gazetteer/en"; class = "gate.creole.gazetteer.DefaultGazetteer"; condition = { key = "lang"; value = "en"; } params = { wholeWordsOnly = "true"; longestMatchOnly = "false"; gazetteerFeatureSeparator = "\t"; listsURL = "{{basedir}}/gazetteer/en/lists.def"; caseSensitive = "false"; encoding = "UTF-8"; } }); }
Gate can handle the vast majority of all parameters out of the box. This includes string or URL typed parameters. However, some processing resources accept custom types such as enum, and this is not always detected by Gate. In that case, you have to tell Gate explicitly what type of parameter it is. The example below shows how a parameter of the enum type gate.learning.RunMode is fed to the machine learning PR.
{ pipeline = ( { title = "reset"; class = "gate.creole.annotdelete.AnnotationDeletePR"; params = { setsToKeep = ("Key"); keepOriginalMarkupsAS = true; } }, { title = "tokenizer"; class = "gate.creole.tokeniser.DefaultTokeniser"; dependencies = ("gate.creole.tokeniser.SimpleTokeniser", "gate.creole.Transducer"); params = { encoding = "UTF-8"; tokeniserRulesURL = "{{basedir}}/tokeniser/DefaultTokeniser.rules"; transducerGrammarURL = "{{basedir}}/tokeniser/postprocess.jape"; } }, { title = "learning"; class = "gate.learning.LearningAPIMain"; params = { configFileURL = "{{basedir}}/learning/config.xml"; learningMode = { type = "gate.learning.RunMode"; value = "APPLICATION"; } } }); }