/** * elasticlunr - http://weixsong.github.io * Lightweight full-text search engine in Javascript for browser search and offline search. - 0.9.5 * * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed * @license */ (function(){ /*! * elasticlunr.js * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song */ /** * Convenience function for instantiating a new elasticlunr index and configuring it * with the default pipeline functions and the passed config function. * * When using this convenience function a new index will be created with the * following functions already in the pipeline: * * 1. elasticlunr.trimmer - trim non-word character * 2. elasticlunr.StopWordFilter - filters out any stop words before they enter the * index * 3. elasticlunr.stemmer - stems the tokens before entering the index. * * * Example: * * var idx = elasticlunr(function () { * this.addField('id'); * this.addField('title'); * this.addField('body'); * * //this.setRef('id'); // default ref is 'id' * * this.pipeline.add(function () { * // some custom pipeline function * }); * }); * * idx.addDoc({ * id: 1, * title: 'Oracle released database 12g', * body: 'Yestaday, Oracle has released their latest database, named 12g, more robust. this product will increase Oracle profit.' * }); * * idx.addDoc({ * id: 2, * title: 'Oracle released annual profit report', * body: 'Yestaday, Oracle has released their annual profit report of 2015, total profit is 12.5 Billion.' * }); * * # simple search * idx.search('oracle database'); * * # search with query-time boosting * idx.search('oracle database', {fields: {title: {boost: 2}, body: {boost: 1}}}); * * @param {Function} config A function that will be called with the new instance * of the elasticlunr.Index as both its context and first parameter. It can be used to * customize the instance of new elasticlunr.Index. * @namespace * @module * @return {elasticlunr.Index} * */ var elasticlunr = function (config) { var idx = new elasticlunr.Index; idx.pipeline.add( elasticlunr.trimmer, elasticlunr.stopWordFilter, elasticlunr.stemmer ); if (config) config.call(idx, idx); return idx; }; elasticlunr.version = "0.9.5"; // only used this to make elasticlunr.js compatible with lunr-languages // this is a trick to define a global alias of elasticlunr lunr = elasticlunr; /*! * elasticlunr.utils * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song */ /** * A namespace containing utils for the rest of the elasticlunr library */ elasticlunr.utils = {}; /** * Print a warning message to the console. * * @param {String} message The message to be printed. * @memberOf Utils */ elasticlunr.utils.warn = (function (global) { return function (message) { if (global.console && console.warn) { console.warn(message); } }; })(this); /** * Convert an object to string. * * In the case of `null` and `undefined` the function returns * an empty string, in all other cases the result of calling * `toString` on the passed object is returned. * * @param {object} obj The object to convert to a string. * @return {String} string representation of the passed object. * @memberOf Utils */ elasticlunr.utils.toString = function (obj) { if (obj === void 0 || obj === null) { return ""; } return obj.toString(); }; /*! * elasticlunr.EventEmitter * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song */ /** * elasticlunr.EventEmitter is an event emitter for elasticlunr. * It manages adding and removing event handlers and triggering events and their handlers. * * Each event could has multiple corresponding functions, * these functions will be called as the sequence that they are added into the event. * * @constructor */ elasticlunr.EventEmitter = function () { this.events = {}; }; /** * Binds a handler function to a specific event(s). * * Can bind a single function to many different events in one call. * * @param {String} [eventName] The name(s) of events to bind this function to. * @param {Function} fn The function to call when an event is fired. * @memberOf EventEmitter */ elasticlunr.EventEmitter.prototype.addListener = function () { var args = Array.prototype.slice.call(arguments), fn = args.pop(), names = args; if (typeof fn !== "function") throw new TypeError ("last argument must be a function"); names.forEach(function (name) { if (!this.hasHandler(name)) this.events[name] = []; this.events[name].push(fn); }, this); }; /** * Removes a handler function from a specific event. * * @param {String} eventName The name of the event to remove this function from. * @param {Function} fn The function to remove from an event. * @memberOf EventEmitter */ elasticlunr.EventEmitter.prototype.removeListener = function (name, fn) { if (!this.hasHandler(name)) return; var fnIndex = this.events[name].indexOf(fn); if (fnIndex === -1) return; this.events[name].splice(fnIndex, 1); if (this.events[name].length == 0) delete this.events[name]; }; /** * Call all functions that bounded to the given event. * * Additional data can be passed to the event handler as arguments to `emit` * after the event name. * * @param {String} eventName The name of the event to emit. * @memberOf EventEmitter */ elasticlunr.EventEmitter.prototype.emit = function (name) { if (!this.hasHandler(name)) return; var args = Array.prototype.slice.call(arguments, 1); this.events[name].forEach(function (fn) { fn.apply(undefined, args); }, this); }; /** * Checks whether a handler has ever been stored against an event. * * @param {String} eventName The name of the event to check. * @private * @memberOf EventEmitter */ elasticlunr.EventEmitter.prototype.hasHandler = function (name) { return name in this.events; }; /*! * elasticlunr.tokenizer * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song */ /** * A function for splitting a string into tokens. * Currently English is supported as default. * Uses `elasticlunr.tokenizer.seperator` to split strings, you could change * the value of this property to set how you want strings are split into tokens. * IMPORTANT: use elasticlunr.tokenizer.seperator carefully, if you are not familiar with * text process, then you'd better not change it. * * @module * @param {String} str The string that you want to tokenize. * @see elasticlunr.tokenizer.seperator * @return {Array} */ elasticlunr.tokenizer = function (str) { if (!arguments.length || str === null || str === undefined) return []; if (Array.isArray(str)) { var arr = str.filter(function(token) { if (token === null || token === undefined) { return false; } return true; }); arr = arr.map(function (t) { return elasticlunr.utils.toString(t).toLowerCase(); }); var out = []; arr.forEach(function(item) { var tokens = item.split(elasticlunr.tokenizer.seperator); out = out.concat(tokens); }, this); return out; } return str.toString().trim().toLowerCase().split(elasticlunr.tokenizer.seperator); }; /** * Default string seperator. */ elasticlunr.tokenizer.defaultSeperator = /[\s\-]+/; /** * The sperator used to split a string into tokens. Override this property to change the behaviour of * `elasticlunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens. * * @static * @see elasticlunr.tokenizer */ elasticlunr.tokenizer.seperator = elasticlunr.tokenizer.defaultSeperator; /** * Set up customized string seperator * * @param {Object} sep The customized seperator that you want to use to tokenize a string. */ elasticlunr.tokenizer.setSeperator = function(sep) { if (sep !== null && sep !== undefined && typeof(sep) === 'object') { elasticlunr.tokenizer.seperator = sep; } } /** * Reset string seperator * */ elasticlunr.tokenizer.resetSeperator = function() { elasticlunr.tokenizer.seperator = elasticlunr.tokenizer.defaultSeperator; } /** * Get string seperator * */ elasticlunr.tokenizer.getSeperator = function() { return elasticlunr.tokenizer.seperator; } /*! * elasticlunr.Pipeline * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song */ /** * elasticlunr.Pipelines maintain an ordered list of functions to be applied to * both documents tokens and query tokens. * * An instance of elasticlunr.Index will contain a pipeline * with a trimmer, a stop word filter, an English stemmer. Extra * functions can be added before or after either of these functions or these * default functions can be removed. * * When run the pipeline, it will call each function in turn. * * The output of the functions in the pipeline will be passed to the next function * in the pipeline. To exclude a token from entering the index the function * should return undefined, the rest of the pipeline will not be called with * this token. * * For serialisation of pipelines to work, all functions used in an instance of * a pipeline should be registered with elasticlunr.Pipeline. Registered functions can * then be loaded. If trying to load a serialised pipeline that uses functions * that are not registered an error will be thrown. * * If not planning on serialising the pipeline then registering pipeline functions * is not necessary. * * @constructor */ elasticlunr.Pipeline = function () { this._queue = []; }; elasticlunr.Pipeline.registeredFunctions = {}; /** * Register a function in the pipeline. * * Functions that are used in the pipeline should be registered if the pipeline * needs to be serialised, or a serialised pipeline needs to be loaded. * * Registering a function does not add it to a pipeline, functions must still be * added to instances of the pipeline for them to be used when running a pipeline. * * @param {Function} fn The function to register. * @param {String} label The label to register this function with * @memberOf Pipeline */ elasticlunr.Pipeline.registerFunction = function (fn, label) { if (label in elasticlunr.Pipeline.registeredFunctions) { elasticlunr.utils.warn('Overwriting existing registered function: ' + label); } fn.label = label; elasticlunr.Pipeline.registeredFunctions[label] = fn; }; /** * Get a registered function in the pipeline. * * @param {String} label The label of registered function. * @return {Function} * @memberOf Pipeline */ elasticlunr.Pipeline.getRegisteredFunction = function (label) { if ((label in elasticlunr.Pipeline.registeredFunctions) !== true) { return null; } return elasticlunr.Pipeline.registeredFunctions[label]; }; /** * Warns if the function is not registered as a Pipeline function. * * @param {Function} fn The function to check for. * @private * @memberOf Pipeline */ elasticlunr.Pipeline.warnIfFunctionNotRegistered = function (fn) { var isRegistered = fn.label && (fn.label in this.registeredFunctions); if (!isRegistered) { elasticlunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn); } }; /** * Loads a previously serialised pipeline. * * All functions to be loaded must already be registered with elasticlunr.Pipeline. * If any function from the serialised data has not been registered then an * error will be thrown. * * @param {Object} serialised The serialised pipeline to load. * @return {elasticlunr.Pipeline} * @memberOf Pipeline */ elasticlunr.Pipeline.load = function (serialised) { var pipeline = new elasticlunr.Pipeline; serialised.forEach(function (fnName) { var fn = elasticlunr.Pipeline.getRegisteredFunction(fnName); if (fn) { pipeline.add(fn); } else { throw new Error('Cannot load un-registered function: ' + fnName); } }); return pipeline; }; /** * Adds new functions to the end of the pipeline. * * Logs a warning if the function has not been registered. * * @param {Function} functions Any number of functions to add to the pipeline. * @memberOf Pipeline */ elasticlunr.Pipeline.prototype.add = function () { var fns = Array.prototype.slice.call(arguments); fns.forEach(function (fn) { elasticlunr.Pipeline.warnIfFunctionNotRegistered(fn); this._queue.push(fn); }, this); }; /** * Adds a single function after a function that already exists in the * pipeline. * * Logs a warning if the function has not been registered. * If existingFn is not found, throw an Exception. * * @param {Function} existingFn A function that already exists in the pipeline. * @param {Function} newFn The new function to add to the pipeline. * @memberOf Pipeline */ elasticlunr.Pipeline.prototype.after = function (existingFn, newFn) { elasticlunr.Pipeline.warnIfFunctionNotRegistered(newFn); var pos = this._queue.indexOf(existingFn); if (pos === -1) { throw new Error('Cannot find existingFn'); } this._queue.splice(pos + 1, 0, newFn); }; /** * Adds a single function before a function that already exists in the * pipeline. * * Logs a warning if the function has not been registered. * If existingFn is not found, throw an Exception. * * @param {Function} existingFn A function that already exists in the pipeline. * @param {Function} newFn The new function to add to the pipeline. * @memberOf Pipeline */ elasticlunr.Pipeline.prototype.before = function (existingFn, newFn) { elasticlunr.Pipeline.warnIfFunctionNotRegistered(newFn); var pos = this._queue.indexOf(existingFn); if (pos === -1) { throw new Error('Cannot find existingFn'); } this._queue.splice(pos, 0, newFn); }; /** * Removes a function from the pipeline. * * @param {Function} fn The function to remove from the pipeline. * @memberOf Pipeline */ elasticlunr.Pipeline.prototype.remove = function (fn) { var pos = this._queue.indexOf(fn); if (pos === -1) { return; } this._queue.splice(pos, 1); }; /** * Runs the current list of functions that registered in the pipeline against the * input tokens. * * @param {Array} tokens The tokens to run through the pipeline. * @return {Array} * @memberOf Pipeline */ elasticlunr.Pipeline.prototype.run = function (tokens) { var out = [], tokenLength = tokens.length, pipelineLength = this._queue.length; for (var i = 0; i < tokenLength; i++) { var token = tokens[i]; for (var j = 0; j < pipelineLength; j++) { token = this._queue[j](token, i, tokens); if (token === void 0 || token === null) break; }; if (token !== void 0 && token !== null) out.push(token); }; return out; }; /** * Resets the pipeline by removing any existing processors. * * @memberOf Pipeline */ elasticlunr.Pipeline.prototype.reset = function () { this._queue = []; }; /** * Get the pipeline if user want to check the pipeline. * * @memberOf Pipeline */ elasticlunr.Pipeline.prototype.get = function () { return this._queue; }; /** * Returns a representation of the pipeline ready for serialisation. * Only serialize pipeline function's name. Not storing function, so when * loading the archived JSON index file, corresponding pipeline function is * added by registered function of elasticlunr.Pipeline.registeredFunctions * * Logs a warning if the function has not been registered. * * @return {Array} * @memberOf Pipeline */ elasticlunr.Pipeline.prototype.toJSON = function () { return this._queue.map(function (fn) { elasticlunr.Pipeline.warnIfFunctionNotRegistered(fn); return fn.label; }); }; /*! * elasticlunr.Index * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song */ /** * elasticlunr.Index is object that manages a search index. It contains the indexes * and stores all the tokens and document lookups. It also provides the main * user facing API for the library. * * @constructor */ elasticlunr.Index = function () { this._fields = []; this._ref = 'id'; this.pipeline = new elasticlunr.Pipeline; this.documentStore = new elasticlunr.DocumentStore; this.index = {}; this.eventEmitter = new elasticlunr.EventEmitter; this._idfCache = {}; this.on('add', 'remove', 'update', (function () { this._idfCache = {}; }).bind(this)); }; /** * Bind a handler to events being emitted by the index. * * The handler can be bound to many events at the same time. * * @param {String} [eventName] The name(s) of events to bind the function to. * @param {Function} fn The serialised set to load. * @memberOf Index */ elasticlunr.Index.prototype.on = function () { var args = Array.prototype.slice.call(arguments); return this.eventEmitter.addListener.apply(this.eventEmitter, args); }; /** * Removes a handler from an event being emitted by the index. * * @param {String} eventName The name of events to remove the function from. * @param {Function} fn The serialised set to load. * @memberOf Index */ elasticlunr.Index.prototype.off = function (name, fn) { return this.eventEmitter.removeListener(name, fn); }; /** * Loads a previously serialised index. * * Issues a warning if the index being imported was serialised * by a different version of elasticlunr. * * @param {Object} serialisedData The serialised set to load. * @return {elasticlunr.Index} * @memberOf Index */ elasticlunr.Index.load = function (serialisedData) { if (serialisedData.version !== elasticlunr.version) { elasticlunr.utils.warn('version mismatch: current ' + elasticlunr.version + ' importing ' + serialisedData.version); } var idx = new this; idx._fields = serialisedData.fields; idx._ref = serialisedData.ref; idx.documentStore = elasticlunr.DocumentStore.load(serialisedData.documentStore); idx.pipeline = elasticlunr.Pipeline.load(serialisedData.pipeline); idx.index = {}; for (var field in serialisedData.index) { idx.index[field] = elasticlunr.InvertedIndex.load(serialisedData.index[field]); } return idx; }; /** * Adds a field to the list of fields that will be searchable within documents in the index. * * Remember that inner index is build based on field, which means each field has one inverted index. * * Fields should be added before any documents are added to the index, fields * that are added after documents are added to the index will only apply to new * documents added to the index. * * @param {String} fieldName The name of the field within the document that should be indexed * @return {elasticlunr.Index} * @memberOf Index */ elasticlunr.Index.prototype.addField = function (fieldName) { this._fields.push(fieldName); this.index[fieldName] = new elasticlunr.InvertedIndex; return this; }; /** * Sets the property used to uniquely identify documents added to the index, * by default this property is 'id'. * * This should only be changed before adding documents to the index, changing * the ref property without resetting the index can lead to unexpected results. * * @param {String} refName The property to use to uniquely identify the * documents in the index. * @param {Boolean} emitEvent Whether to emit add events, defaults to true * @return {elasticlunr.Index} * @memberOf Index */ elasticlunr.Index.prototype.setRef = function (refName) { this._ref = refName; return this; }; /** * * Set if the JSON format original documents are save into elasticlunr.DocumentStore * * Defaultly save all the original JSON documents. * * @param {Boolean} save Whether to save the original JSON documents. * @return {elasticlunr.Index} * @memberOf Index */ elasticlunr.Index.prototype.saveDocument = function (save) { this.documentStore = new elasticlunr.DocumentStore(save); return this; }; /** * Add a JSON format document to the index. * * This is the way new documents enter the index, this function will run the * fields from the document through the index's pipeline and then add it to * the index, it will then show up in search results. * * An 'add' event is emitted with the document that has been added and the index * the document has been added to. This event can be silenced by passing false * as the second argument to add. * * @param {Object} doc The JSON format document to add to the index. * @param {Boolean} emitEvent Whether or not to emit events, default true. * @memberOf Index */ elasticlunr.Index.prototype.addDoc = function (doc, emitEvent) { if (!doc) return; var emitEvent = emitEvent === undefined ? true : emitEvent; var docRef = doc[this._ref]; this.documentStore.addDoc(docRef, doc); this._fields.forEach(function (field) { var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field])); this.documentStore.addFieldLength(docRef, field, fieldTokens.length); var tokenCount = {}; fieldTokens.forEach(function (token) { if (token in tokenCount) tokenCount[token] += 1; else tokenCount[token] = 1; }, this); for (var token in tokenCount) { var termFrequency = tokenCount[token]; termFrequency = Math.sqrt(termFrequency); this.index[field].addToken(token, { ref: docRef, tf: termFrequency }); } }, this); if (emitEvent) this.eventEmitter.emit('add', doc, this); }; /** * Removes a document from the index by doc ref. * * To make sure documents no longer show up in search results they can be * removed from the index using this method. * * A 'remove' event is emitted with the document that has been removed and the index * the document has been removed from. This event can be silenced by passing false * as the second argument to remove. * * If user setting DocumentStore not storing the documents, then remove doc by docRef is not allowed. * * @param {String|Integer} docRef The document ref to remove from the index. * @param {Boolean} emitEvent Whether to emit remove events, defaults to true * @memberOf Index */ elasticlunr.Index.prototype.removeDocByRef = function (docRef, emitEvent) { if (!docRef) return; if (this.documentStore.isDocStored() === false) { return; } if (!this.documentStore.hasDoc(docRef)) return; var doc = this.documentStore.getDoc(docRef); this.removeDoc(doc, false); }; /** * Removes a document from the index. * This remove operation could work even the original doc is not store in the DocumentStore. * * To make sure documents no longer show up in search results they can be * removed from the index using this method. * * A 'remove' event is emitted with the document that has been removed and the index * the document has been removed from. This event can be silenced by passing false * as the second argument to remove. * * * @param {Object} doc The document ref to remove from the index. * @param {Boolean} emitEvent Whether to emit remove events, defaults to true * @memberOf Index */ elasticlunr.Index.prototype.removeDoc = function (doc, emitEvent) { if (!doc) return; var emitEvent = emitEvent === undefined ? true : emitEvent; var docRef = doc[this._ref]; if (!this.documentStore.hasDoc(docRef)) return; this.documentStore.removeDoc(docRef); this._fields.forEach(function (field) { var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field])); fieldTokens.forEach(function (token) { this.index[field].removeToken(token, docRef); }, this); }, this); if (emitEvent) this.eventEmitter.emit('remove', doc, this); }; /** * Updates a document in the index. * * When a document contained within the index gets updated, fields changed, * added or removed, to make sure it correctly matched against search queries, * it should be updated in the index. * * This method is just a wrapper around `remove` and `add` * * An 'update' event is emitted with the document that has been updated and the index. * This event can be silenced by passing false as the second argument to update. Only * an update event will be fired, the 'add' and 'remove' events of the underlying calls * are silenced. * * @param {Object} doc The document to update in the index. * @param {Boolean} emitEvent Whether to emit update events, defaults to true * @see Index.prototype.remove * @see Index.prototype.add * @memberOf Index */ elasticlunr.Index.prototype.updateDoc = function (doc, emitEvent) { var emitEvent = emitEvent === undefined ? true : emitEvent; this.removeDocByRef(doc[this._ref], false); this.addDoc(doc, false); if (emitEvent) this.eventEmitter.emit('update', doc, this); }; /** * Calculates the inverse document frequency for a token within the index of a field. * * @param {String} token The token to calculate the idf of. * @param {String} field The field to compute idf. * @see Index.prototype.idf * @private * @memberOf Index */ elasticlunr.Index.prototype.idf = function (term, field) { var cacheKey = "@" + field + '/' + term; if (Object.prototype.hasOwnProperty.call(this._idfCache, cacheKey)) return this._idfCache[cacheKey]; var df = this.index[field].getDocFreq(term); var idf = 1 + Math.log(this.documentStore.length / (df + 1)); this._idfCache[cacheKey] = idf; return idf; }; /** * get fields of current index instance * * @return {Array} */ elasticlunr.Index.prototype.getFields = function () { return this._fields.slice(); }; /** * Searches the index using the passed query. * Queries should be a string, multiple words are allowed. * * If config is null, will search all fields defaultly, and lead to OR based query. * If config is specified, will search specified with query time boosting. * * All query tokens are passed through the same pipeline that document tokens * are passed through, so any language processing involved will be run on every * query term. * * Each query term is expanded, so that the term 'he' might be expanded to * 'hello' and 'help' if those terms were already included in the index. * * Matching documents are returned as an array of objects, each object contains * the matching document ref, as set for this index, and the similarity score * for this document against the query. * * @param {String} query The query to search the index with. * @param {JSON} userConfig The user query config, JSON format. * @return {Object} * @see Index.prototype.idf * @see Index.prototype.documentVector * @memberOf Index */ elasticlunr.Index.prototype.search = function (query, userConfig) { if (!query) return []; if (typeof query === 'string') { query = {any: query}; } else { query = JSON.parse(JSON.stringify(query)); } var configStr = null; if (userConfig != null) { configStr = JSON.stringify(userConfig); } var config = new elasticlunr.Configuration(configStr, this.getFields()).get(); var queryTokens = {}; var queryFields = Object.keys(query); for (var i = 0; i < queryFields.length; i++) { var key = queryFields[i]; queryTokens[key] = this.pipeline.run(elasticlunr.tokenizer(query[key])); } var queryResults = {}; for (var field in config) { var tokens = queryTokens[field] || queryTokens.any; if (!tokens) { continue; } var fieldSearchResults = this.fieldSearch(tokens, field, config); var fieldBoost = config[field].boost; for (var docRef in fieldSearchResults) { fieldSearchResults[docRef] = fieldSearchResults[docRef] * fieldBoost; } for (var docRef in fieldSearchResults) { if (docRef in queryResults) { queryResults[docRef] += fieldSearchResults[docRef]; } else { queryResults[docRef] = fieldSearchResults[docRef]; } } } var results = []; var result; for (var docRef in queryResults) { result = {ref: docRef, score: queryResults[docRef]}; if (this.documentStore.hasDoc(docRef)) { result.doc = this.documentStore.getDoc(docRef); } results.push(result); } results.sort(function (a, b) { return b.score - a.score; }); return results; }; /** * search queryTokens in specified field. * * @param {Array} queryTokens The query tokens to query in this field. * @param {String} field Field to query in. * @param {elasticlunr.Configuration} config The user query config, JSON format. * @return {Object} */ elasticlunr.Index.prototype.fieldSearch = function (queryTokens, fieldName, config) { var booleanType = config[fieldName].bool; var expand = config[fieldName].expand; var boost = config[fieldName].boost; var scores = null; var docTokens = {}; // Do nothing if the boost is 0 if (boost === 0) { return; } queryTokens.forEach(function (token) { var tokens = [token]; if (expand == true) { tokens = this.index[fieldName].expandToken(token); } // Consider every query token in turn. If expanded, each query token // corresponds to a set of tokens, which is all tokens in the // index matching the pattern queryToken* . // For the set of tokens corresponding to a query token, find and score // all matching documents. Store those scores in queryTokenScores, // keyed by docRef. // Then, depending on the value of booleanType, combine the scores // for this query token with previous scores. If booleanType is OR, // then merge the scores by summing into the accumulated total, adding // new document scores are required (effectively a union operator). // If booleanType is AND, accumulate scores only if the document // has previously been scored by another query token (an intersection // operation0. // Furthermore, since when booleanType is AND, additional // query tokens can't add new documents to the result set, use the // current document set to limit the processing of each new query // token for efficiency (i.e., incremental intersection). var queryTokenScores = {}; tokens.forEach(function (key) { var docs = this.index[fieldName].getDocs(key); var idf = this.idf(key, fieldName); if (scores && booleanType == 'AND') { // special case, we can rule out documents that have been // already been filtered out because they weren't scored // by previous query token passes. var filteredDocs = {}; for (var docRef in scores) { if (docRef in docs) { filteredDocs[docRef] = docs[docRef]; } } docs = filteredDocs; } // only record appeared token for retrieved documents for the // original token, not for expaned token. // beause for doing coordNorm for a retrieved document, coordNorm only care how many // query token appear in that document. // so expanded token should not be added into docTokens, if added, this will pollute the // coordNorm if (key == token) { this.fieldSearchStats(docTokens, key, docs); } for (var docRef in docs) { var tf = this.index[fieldName].getTermFrequency(key, docRef); var fieldLength = this.documentStore.getFieldLength(docRef, fieldName); var fieldLengthNorm = 1; if (fieldLength != 0) { fieldLengthNorm = 1 / Math.sqrt(fieldLength); } var penality = 1; if (key != token) { // currently I'm not sure if this penality is enough, // need to do verification penality = (1 - (key.length - token.length) / key.length) * 0.15; } var score = tf * idf * fieldLengthNorm * penality; if (docRef in queryTokenScores) { queryTokenScores[docRef] += score; } else { queryTokenScores[docRef] = score; } } }, this); scores = this.mergeScores(scores, queryTokenScores, booleanType); }, this); scores = this.coordNorm(scores, docTokens, queryTokens.length); return scores; }; /** * Merge the scores from one set of tokens into an accumulated score table. * Exact operation depends on the op parameter. If op is 'AND', then only the * intersection of the two score lists is retained. Otherwise, the union of * the two score lists is returned. For internal use only. * * @param {Object} bool accumulated scores. Should be null on first call. * @param {String} scores new scores to merge into accumScores. * @param {Object} op merge operation (should be 'AND' or 'OR'). * */ elasticlunr.Index.prototype.mergeScores = function (accumScores, scores, op) { if (!accumScores) { return scores; } if (op == 'AND') { var intersection = {}; for (var docRef in scores) { if (docRef in accumScores) { intersection[docRef] = accumScores[docRef] + scores[docRef]; } } return intersection; } else { for (var docRef in scores) { if (docRef in accumScores) { accumScores[docRef] += scores[docRef]; } else { accumScores[docRef] = scores[docRef]; } } return accumScores; } }; /** * Record the occuring query token of retrieved doc specified by doc field. * Only for inner user. * * @param {Object} docTokens a data structure stores which token appears in the retrieved doc. * @param {String} token query token * @param {Object} docs the retrieved documents of the query token * */ elasticlunr.Index.prototype.fieldSearchStats = function (docTokens, token, docs) { for (var doc in docs) { if (doc in docTokens) { docTokens[doc].push(token); } else { docTokens[doc] = [token]; } } }; /** * coord norm the score of a doc. * if a doc contain more query tokens, then the score will larger than the doc * contains less query tokens. * * only for inner use. * * @param {Object} results first results * @param {Object} docs field search results of a token * @param {Integer} n query token number * @return {Object} */ elasticlunr.Index.prototype.coordNorm = function (scores, docTokens, n) { for (var doc in scores) { if (!(doc in docTokens)) continue; var tokens = docTokens[doc].length; scores[doc] = scores[doc] * tokens / n; } return scores; }; /** * Returns a representation of the index ready for serialisation. * * @return {Object} * @memberOf Index */ elasticlunr.Index.prototype.toJSON = function () { var indexJson = {}; this._fields.forEach(function (field) { indexJson[field] = this.index[field].toJSON(); }, this); return { version: elasticlunr.version, fields: this._fields, ref: this._ref, documentStore: this.documentStore.toJSON(), index: indexJson, pipeline: this.pipeline.toJSON() }; }; /** * Applies a plugin to the current index. * * A plugin is a function that is called with the index as its context. * Plugins can be used to customise or extend the behaviour the index * in some way. A plugin is just a function, that encapsulated the custom * behaviour that should be applied to the index. * * The plugin function will be called with the index as its argument, additional * arguments can also be passed when calling use. The function will be called * with the index as its context. * * Example: * * var myPlugin = function (idx, arg1, arg2) { * // `this` is the index to be extended * // apply any extensions etc here. * } * * var idx = elasticlunr(function () { * this.use(myPlugin, 'arg1', 'arg2') * }) * * @param {Function} plugin The plugin to apply. * @memberOf Index */ elasticlunr.Index.prototype.use = function (plugin) { var args = Array.prototype.slice.call(arguments, 1); args.unshift(this); plugin.apply(this, args); }; /*! * elasticlunr.DocumentStore * Copyright (C) 2017 Wei Song */ /** * elasticlunr.DocumentStore is a simple key-value document store used for storing sets of tokens for * documents stored in index. * * elasticlunr.DocumentStore store original JSON format documents that you could build search snippet by this original JSON document. * * user could choose whether original JSON format document should be store, if no configuration then document will be stored defaultly. * If user care more about the index size, user could select not store JSON documents, then this will has some defects, such as user * could not use JSON document to generate snippets of search results. * * @param {Boolean} save If the original JSON document should be stored. * @constructor * @module */ elasticlunr.DocumentStore = function (save) { if (save === null || save === undefined) { this._save = true; } else { this._save = save; } this.docs = {}; this.docInfo = {}; this.length = 0; }; /** * Loads a previously serialised document store * * @param {Object} serialisedData The serialised document store to load. * @return {elasticlunr.DocumentStore} */ elasticlunr.DocumentStore.load = function (serialisedData) { var store = new this; store.length = serialisedData.length; store.docs = serialisedData.docs; store.docInfo = serialisedData.docInfo; store._save = serialisedData.save; return store; }; /** * check if current instance store the original doc * * @return {Boolean} */ elasticlunr.DocumentStore.prototype.isDocStored = function () { return this._save; }; /** * Stores the given doc in the document store against the given id. * If docRef already exist, then update doc. * * Document is store by original JSON format, then you could use original document to generate search snippets. * * @param {Integer|String} docRef The key used to store the JSON format doc. * @param {Object} doc The JSON format doc. */ elasticlunr.DocumentStore.prototype.addDoc = function (docRef, doc) { if (!this.hasDoc(docRef)) this.length++; if (this._save === true) { this.docs[docRef] = clone(doc); } else { this.docs[docRef] = null; } }; /** * Retrieves the JSON doc from the document store for a given key. * * If docRef not found, return null. * If user set not storing the documents, return null. * * @param {Integer|String} docRef The key to lookup and retrieve from the document store. * @return {Object} * @memberOf DocumentStore */ elasticlunr.DocumentStore.prototype.getDoc = function (docRef) { if (this.hasDoc(docRef) === false) return null; return this.docs[docRef]; }; /** * Checks whether the document store contains a key (docRef). * * @param {Integer|String} docRef The id to look up in the document store. * @return {Boolean} * @memberOf DocumentStore */ elasticlunr.DocumentStore.prototype.hasDoc = function (docRef) { return docRef in this.docs; }; /** * Removes the value for a key in the document store. * * @param {Integer|String} docRef The id to remove from the document store. * @memberOf DocumentStore */ elasticlunr.DocumentStore.prototype.removeDoc = function (docRef) { if (!this.hasDoc(docRef)) return; delete this.docs[docRef]; delete this.docInfo[docRef]; this.length--; }; /** * Add field length of a document's field tokens from pipeline results. * The field length of a document is used to do field length normalization even without the original JSON document stored. * * @param {Integer|String} docRef document's id or reference * @param {String} fieldName field name * @param {Integer} length field length */ elasticlunr.DocumentStore.prototype.addFieldLength = function (docRef, fieldName, length) { if (docRef === null || docRef === undefined) return; if (this.hasDoc(docRef) == false) return; if (!this.docInfo[docRef]) this.docInfo[docRef] = {}; this.docInfo[docRef][fieldName] = length; }; /** * Update field length of a document's field tokens from pipeline results. * The field length of a document is used to do field length normalization even without the original JSON document stored. * * @param {Integer|String} docRef document's id or reference * @param {String} fieldName field name * @param {Integer} length field length */ elasticlunr.DocumentStore.prototype.updateFieldLength = function (docRef, fieldName, length) { if (docRef === null || docRef === undefined) return; if (this.hasDoc(docRef) == false) return; this.addFieldLength(docRef, fieldName, length); }; /** * get field length of a document by docRef * * @param {Integer|String} docRef document id or reference * @param {String} fieldName field name * @return {Integer} field length */ elasticlunr.DocumentStore.prototype.getFieldLength = function (docRef, fieldName) { if (docRef === null || docRef === undefined) return 0; if (!(docRef in this.docs)) return 0; if (!(fieldName in this.docInfo[docRef])) return 0; return this.docInfo[docRef][fieldName]; }; /** * Returns a JSON representation of the document store used for serialisation. * * @return {Object} JSON format * @memberOf DocumentStore */ elasticlunr.DocumentStore.prototype.toJSON = function () { return { docs: this.docs, docInfo: this.docInfo, length: this.length, save: this._save }; }; /** * Cloning object * * @param {Object} object in JSON format * @return {Object} copied object */ function clone(obj) { if (null === obj || "object" !== typeof obj) return obj; var copy = obj.constructor(); for (var attr in obj) { if (obj.hasOwnProperty(attr)) copy[attr] = obj[attr]; } return copy; } /*! * elasticlunr.stemmer * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt */ /** * elasticlunr.stemmer is an english language stemmer, this is a JavaScript * implementation of the PorterStemmer taken from http://tartarus.org/~martin * * @module * @param {String} str The string to stem * @return {String} * @see elasticlunr.Pipeline */ elasticlunr.stemmer = (function(){ var step2list = { "ational" : "ate", "tional" : "tion", "enci" : "ence", "anci" : "ance", "izer" : "ize", "bli" : "ble", "alli" : "al", "entli" : "ent", "eli" : "e", "ousli" : "ous", "ization" : "ize", "ation" : "ate", "ator" : "ate", "alism" : "al", "iveness" : "ive", "fulness" : "ful", "ousness" : "ous", "aliti" : "al", "iviti" : "ive", "biliti" : "ble", "logi" : "log" }, step3list = { "icate" : "ic", "ative" : "", "alize" : "al", "iciti" : "ic", "ical" : "ic", "ful" : "", "ness" : "" }, c = "[^aeiou]", // consonant v = "[aeiouy]", // vowel C = c + "[^aeiouy]*", // consonant sequence V = v + "[aeiou]*", // vowel sequence mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0 meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1 mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1 s_v = "^(" + C + ")?" + v; // vowel in stem var re_mgr0 = new RegExp(mgr0); var re_mgr1 = new RegExp(mgr1); var re_meq1 = new RegExp(meq1); var re_s_v = new RegExp(s_v); var re_1a = /^(.+?)(ss|i)es$/; var re2_1a = /^(.+?)([^s])s$/; var re_1b = /^(.+?)eed$/; var re2_1b = /^(.+?)(ed|ing)$/; var re_1b_2 = /.$/; var re2_1b_2 = /(at|bl|iz)$/; var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$"); var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$"); var re_1c = /^(.+?[^aeiou])y$/; var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; var re2_4 = /^(.+?)(s|t)(ion)$/; var re_5 = /^(.+?)e$/; var re_5_1 = /ll$/; var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$"); var porterStemmer = function porterStemmer(w) { var stem, suffix, firstch, re, re2, re3, re4; if (w.length < 3) { return w; } firstch = w.substr(0,1); if (firstch == "y") { w = firstch.toUpperCase() + w.substr(1); } // Step 1a re = re_1a re2 = re2_1a; if (re.test(w)) { w = w.replace(re,"$1$2"); } else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } // Step 1b re = re_1b; re2 = re2_1b; if (re.test(w)) { var fp = re.exec(w); re = re_mgr0; if (re.test(fp[1])) { re = re_1b_2; w = w.replace(re,""); } } else if (re2.test(w)) { var fp = re2.exec(w); stem = fp[1]; re2 = re_s_v; if (re2.test(stem)) { w = stem; re2 = re2_1b_2; re3 = re3_1b_2; re4 = re4_1b_2; if (re2.test(w)) { w = w + "e"; } else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); } else if (re4.test(w)) { w = w + "e"; } } } // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) re = re_1c; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; w = stem + "i"; } // Step 2 re = re_2; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; suffix = fp[2]; re = re_mgr0; if (re.test(stem)) { w = stem + step2list[suffix]; } } // Step 3 re = re_3; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; suffix = fp[2]; re = re_mgr0; if (re.test(stem)) { w = stem + step3list[suffix]; } } // Step 4 re = re_4; re2 = re2_4; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = re_mgr1; if (re.test(stem)) { w = stem; } } else if (re2.test(w)) { var fp = re2.exec(w); stem = fp[1] + fp[2]; re2 = re_mgr1; if (re2.test(stem)) { w = stem; } } // Step 5 re = re_5; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = re_mgr1; re2 = re_meq1; re3 = re3_5; if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { w = stem; } } re = re_5_1; re2 = re_mgr1; if (re.test(w) && re2.test(w)) { re = re_1b_2; w = w.replace(re,""); } // and turn initial Y back to y if (firstch == "y") { w = firstch.toLowerCase() + w.substr(1); } return w; }; return porterStemmer; })(); elasticlunr.Pipeline.registerFunction(elasticlunr.stemmer, 'stemmer'); /*! * elasticlunr.stopWordFilter * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song */ /** * elasticlunr.stopWordFilter is an English language stop words filter, any words * contained in the stop word list will not be passed through the filter. * * This is intended to be used in the Pipeline. If the token does not pass the * filter then undefined will be returned. * Currently this StopwordFilter using dictionary to do O(1) time complexity stop word filtering. * * @module * @param {String} token The token to pass through the filter * @return {String} * @see elasticlunr.Pipeline */ elasticlunr.stopWordFilter = function (token) { if (token && elasticlunr.stopWordFilter.stopWords[token] !== true) { return token; } }; /** * Remove predefined stop words * if user want to use customized stop words, user could use this function to delete * all predefined stopwords. * * @return {null} */ elasticlunr.clearStopWords = function () { elasticlunr.stopWordFilter.stopWords = {}; }; /** * Add customized stop words * user could use this function to add customized stop words * * @params {Array} words customized stop words * @return {null} */ elasticlunr.addStopWords = function (words) { if (words == null || Array.isArray(words) === false) return; words.forEach(function (word) { elasticlunr.stopWordFilter.stopWords[word] = true; }, this); }; /** * Reset to default stop words * user could use this function to restore default stop words * * @return {null} */ elasticlunr.resetStopWords = function () { elasticlunr.stopWordFilter.stopWords = elasticlunr.defaultStopWords; }; elasticlunr.defaultStopWords = { "": true, "a": true, "able": true, "about": true, "across": true, "after": true, "all": true, "almost": true, "also": true, "am": true, "among": true, "an": true, "and": true, "any": true, "are": true, "as": true, "at": true, "be": true, "because": true, "been": true, "but": true, "by": true, "can": true, "cannot": true, "could": true, "dear": true, "did": true, "do": true, "does": true, "either": true, "else": true, "ever": true, "every": true, "for": true, "from": true, "get": true, "got": true, "had": true, "has": true, "have": true, "he": true, "her": true, "hers": true, "him": true, "his": true, "how": true, "however": true, "i": true, "if": true, "in": true, "into": true, "is": true, "it": true, "its": true, "just": true, "least": true, "let": true, "like": true, "likely": true, "may": true, "me": true, "might": true, "most": true, "must": true, "my": true, "neither": true, "no": true, "nor": true, "not": true, "of": true, "off": true, "often": true, "on": true, "only": true, "or": true, "other": true, "our": true, "own": true, "rather": true, "said": true, "say": true, "says": true, "she": true, "should": true, "since": true, "so": true, "some": true, "than": true, "that": true, "the": true, "their": true, "them": true, "then": true, "there": true, "these": true, "they": true, "this": true, "tis": true, "to": true, "too": true, "twas": true, "us": true, "wants": true, "was": true, "we": true, "were": true, "what": true, "when": true, "where": true, "which": true, "while": true, "who": true, "whom": true, "why": true, "will": true, "with": true, "would": true, "yet": true, "you": true, "your": true }; elasticlunr.stopWordFilter.stopWords = elasticlunr.defaultStopWords; elasticlunr.Pipeline.registerFunction(elasticlunr.stopWordFilter, 'stopWordFilter'); /*! * elasticlunr.trimmer * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song */ /** * elasticlunr.trimmer is a pipeline function for trimming non word * characters from the begining and end of tokens before they * enter the index. * * This implementation may not work correctly for non latin * characters and should either be removed or adapted for use * with languages with non-latin characters. * * @module * @param {String} token The token to pass through the filter * @return {String} * @see elasticlunr.Pipeline */ elasticlunr.trimmer = function (token) { if (token === null || token === undefined) { throw new Error('token should not be undefined'); } return token .replace(/^\W+/, '') .replace(/\W+$/, ''); }; elasticlunr.Pipeline.registerFunction(elasticlunr.trimmer, 'trimmer'); /*! * elasticlunr.InvertedIndex * Copyright (C) 2017 Wei Song * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt */ /** * elasticlunr.InvertedIndex is used for efficiently storing and * lookup of documents that contain a given token. * * @constructor */ elasticlunr.InvertedIndex = function () { this.root = { docs: {}, df: 0 }; }; /** * Loads a previously serialised inverted index. * * @param {Object} serialisedData The serialised inverted index to load. * @return {elasticlunr.InvertedIndex} */ elasticlunr.InvertedIndex.load = function (serialisedData) { var idx = new this; idx.root = serialisedData.root; return idx; }; /** * Adds a {token: tokenInfo} pair to the inverted index. * If the token already exist, then update the tokenInfo. * * tokenInfo format: { ref: 1, tf: 2} * tokenInfor should contains the document's ref and the tf(token frequency) of that token in * the document. * * By default this function starts at the root of the current inverted index, however * it can start at any node of the inverted index if required. * * @param {String} token * @param {Object} tokenInfo format: { ref: 1, tf: 2} * @param {Object} root An optional node at which to start looking for the * correct place to enter the doc, by default the root of this elasticlunr.InvertedIndex * is used. * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.addToken = function (token, tokenInfo, root) { var root = root || this.root, idx = 0; while (idx <= token.length - 1) { var key = token[idx]; if (!(key in root)) root[key] = {docs: {}, df: 0}; idx += 1; root = root[key]; } var docRef = tokenInfo.ref; if (!root.docs[docRef]) { // if this doc not exist, then add this doc root.docs[docRef] = {tf: tokenInfo.tf}; root.df += 1; } else { // if this doc already exist, then update tokenInfo root.docs[docRef] = {tf: tokenInfo.tf}; } }; /** * Checks whether a token is in this elasticlunr.InvertedIndex. * * * @param {String} token The token to be checked * @return {Boolean} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.hasToken = function (token) { if (!token) return false; var node = this.root; for (var i = 0; i < token.length; i++) { if (!node[token[i]]) return false; node = node[token[i]]; } return true; }; /** * Retrieve a node from the inverted index for a given token. * If token not found in this InvertedIndex, return null. * * * @param {String} token The token to get the node for. * @return {Object} * @see InvertedIndex.prototype.get * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.getNode = function (token) { if (!token) return null; var node = this.root; for (var i = 0; i < token.length; i++) { if (!node[token[i]]) return null; node = node[token[i]]; } return node; }; /** * Retrieve the documents of a given token. * If token not found, return {}. * * * @param {String} token The token to get the documents for. * @return {Object} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.getDocs = function (token) { var node = this.getNode(token); if (node == null) { return {}; } return node.docs; }; /** * Retrieve term frequency of given token in given docRef. * If token or docRef not found, return 0. * * * @param {String} token The token to get the documents for. * @param {String|Integer} docRef * @return {Integer} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.getTermFrequency = function (token, docRef) { var node = this.getNode(token); if (node == null) { return 0; } if (!(docRef in node.docs)) { return 0; } return node.docs[docRef].tf; }; /** * Retrieve the document frequency of given token. * If token not found, return 0. * * * @param {String} token The token to get the documents for. * @return {Object} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.getDocFreq = function (token) { var node = this.getNode(token); if (node == null) { return 0; } return node.df; }; /** * Remove the document identified by document's ref from the token in the inverted index. * * * @param {String} token Remove the document from which token. * @param {String} ref The ref of the document to remove from given token. * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.removeToken = function (token, ref) { if (!token) return; var node = this.getNode(token); if (node == null) return; if (ref in node.docs) { delete node.docs[ref]; node.df -= 1; } }; /** * Find all the possible suffixes of given token using tokens currently in the inverted index. * If token not found, return empty Array. * * @param {String} token The token to expand. * @return {Array} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.expandToken = function (token, memo, root) { if (token == null || token == '') return []; var memo = memo || []; if (root == void 0) { root = this.getNode(token); if (root == null) return memo; } if (root.df > 0) memo.push(token); for (var key in root) { if (key === 'docs') continue; if (key === 'df') continue; this.expandToken(token + key, memo, root[key]); } return memo; }; /** * Returns a representation of the inverted index ready for serialisation. * * @return {Object} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.toJSON = function () { return { root: this.root }; }; /*! * elasticlunr.Configuration * Copyright (C) 2017 Wei Song */ /** * elasticlunr.Configuration is used to analyze the user search configuration. * * By elasticlunr.Configuration user could set query-time boosting, boolean model in each field. * * Currently configuration supports: * 1. query-time boosting, user could set how to boost each field. * 2. boolean model chosing, user could choose which boolean model to use for each field. * 3. token expandation, user could set token expand to True to improve Recall. Default is False. * * Query time boosting must be configured by field category, "boolean" model could be configured * by both field category or globally as the following example. Field configuration for "boolean" * will overwrite global configuration. * Token expand could be configured both by field category or golbally. Local field configuration will * overwrite global configuration. * * configuration example: * { * fields:{ * title: {boost: 2}, * body: {boost: 1} * }, * bool: "OR" * } * * "bool" field configuation overwrite global configuation example: * { * fields:{ * title: {boost: 2, bool: "AND"}, * body: {boost: 1} * }, * bool: "OR" * } * * "expand" example: * { * fields:{ * title: {boost: 2, bool: "AND"}, * body: {boost: 1} * }, * bool: "OR", * expand: true * } * * "expand" example for field category: * { * fields:{ * title: {boost: 2, bool: "AND", expand: true}, * body: {boost: 1} * }, * bool: "OR" * } * * setting the boost to 0 ignores the field (this will only search the title): * { * fields:{ * title: {boost: 1}, * body: {boost: 0} * } * } * * then, user could search with configuration to do query-time boosting. * idx.search('oracle database', {fields: {title: {boost: 2}, body: {boost: 1}}}); * * * @constructor * * @param {String} config user configuration * @param {Array} fields fields of index instance * @module */ elasticlunr.Configuration = function (config, fields) { var config = config || ''; if (fields == undefined || fields == null) { throw new Error('fields should not be null'); } this.config = {}; var userConfig; try { userConfig = JSON.parse(config); this.buildUserConfig(userConfig, fields); } catch (error) { elasticlunr.utils.warn('user configuration parse failed, will use default configuration'); this.buildDefaultConfig(fields); } }; /** * Build default search configuration. * * @param {Array} fields fields of index instance */ elasticlunr.Configuration.prototype.buildDefaultConfig = function (fields) { this.reset(); fields.forEach(function (field) { this.config[field] = { boost: 1, bool: "OR", expand: false }; }, this); }; /** * Build user configuration. * * @param {JSON} config User JSON configuratoin * @param {Array} fields fields of index instance */ elasticlunr.Configuration.prototype.buildUserConfig = function (config, fields) { var global_bool = "OR"; var global_expand = false; this.reset(); if ('bool' in config) { global_bool = config['bool'] || global_bool; } if ('expand' in config) { global_expand = config['expand'] || global_expand; } if ('fields' in config) { for (var field in config['fields']) { if (fields.indexOf(field) > -1) { var field_config = config['fields'][field]; var field_expand = global_expand; if (field_config.expand != undefined) { field_expand = field_config.expand; } this.config[field] = { boost: (field_config.boost || field_config.boost === 0) ? field_config.boost : 1, bool: field_config.bool || global_bool, expand: field_expand }; } else { elasticlunr.utils.warn('field name in user configuration not found in index instance fields'); } } } else { this.addAllFields2UserConfig(global_bool, global_expand, fields); } }; /** * Add all fields to user search configuration. * * @param {String} bool Boolean model * @param {String} expand Expand model * @param {Array} fields fields of index instance */ elasticlunr.Configuration.prototype.addAllFields2UserConfig = function (bool, expand, fields) { fields.forEach(function (field) { this.config[field] = { boost: 1, bool: bool, expand: expand }; }, this); }; /** * get current user configuration */ elasticlunr.Configuration.prototype.get = function () { return this.config; }; /** * reset user search configuration. */ elasticlunr.Configuration.prototype.reset = function () { this.config = {}; }; /** * sorted_set.js is added only to make elasticlunr.js compatible with lunr-languages. * if elasticlunr.js support different languages by default, this will make elasticlunr.js * much bigger that not good for browser usage. * */ /*! * lunr.SortedSet * Copyright (C) 2017 Oliver Nightingale */ /** * lunr.SortedSets are used to maintain an array of uniq values in a sorted * order. * * @constructor */ lunr.SortedSet = function () { this.length = 0 this.elements = [] } /** * Loads a previously serialised sorted set. * * @param {Array} serialisedData The serialised set to load. * @returns {lunr.SortedSet} * @memberOf SortedSet */ lunr.SortedSet.load = function (serialisedData) { var set = new this set.elements = serialisedData set.length = serialisedData.length return set } /** * Inserts new items into the set in the correct position to maintain the * order. * * @param {Object} The objects to add to this set. * @memberOf SortedSet */ lunr.SortedSet.prototype.add = function () { var i, element for (i = 0; i < arguments.length; i++) { element = arguments[i] if (~this.indexOf(element)) continue this.elements.splice(this.locationFor(element), 0, element) } this.length = this.elements.length } /** * Converts this sorted set into an array. * * @returns {Array} * @memberOf SortedSet */ lunr.SortedSet.prototype.toArray = function () { return this.elements.slice() } /** * Creates a new array with the results of calling a provided function on every * element in this sorted set. * * Delegates to Array.prototype.map and has the same signature. * * @param {Function} fn The function that is called on each element of the * set. * @param {Object} ctx An optional object that can be used as the context * for the function fn. * @returns {Array} * @memberOf SortedSet */ lunr.SortedSet.prototype.map = function (fn, ctx) { return this.elements.map(fn, ctx) } /** * Executes a provided function once per sorted set element. * * Delegates to Array.prototype.forEach and has the same signature. * * @param {Function} fn The function that is called on each element of the * set. * @param {Object} ctx An optional object that can be used as the context * @memberOf SortedSet * for the function fn. */ lunr.SortedSet.prototype.forEach = function (fn, ctx) { return this.elements.forEach(fn, ctx) } /** * Returns the index at which a given element can be found in the * sorted set, or -1 if it is not present. * * @param {Object} elem The object to locate in the sorted set. * @returns {Number} * @memberOf SortedSet */ lunr.SortedSet.prototype.indexOf = function (elem) { var start = 0, end = this.elements.length, sectionLength = end - start, pivot = start + Math.floor(sectionLength / 2), pivotElem = this.elements[pivot] while (sectionLength > 1) { if (pivotElem === elem) return pivot if (pivotElem < elem) start = pivot if (pivotElem > elem) end = pivot sectionLength = end - start pivot = start + Math.floor(sectionLength / 2) pivotElem = this.elements[pivot] } if (pivotElem === elem) return pivot return -1 } /** * Returns the position within the sorted set that an element should be * inserted at to maintain the current order of the set. * * This function assumes that the element to search for does not already exist * in the sorted set. * * @param {Object} elem The elem to find the position for in the set * @returns {Number} * @memberOf SortedSet */ lunr.SortedSet.prototype.locationFor = function (elem) { var start = 0, end = this.elements.length, sectionLength = end - start, pivot = start + Math.floor(sectionLength / 2), pivotElem = this.elements[pivot] while (sectionLength > 1) { if (pivotElem < elem) start = pivot if (pivotElem > elem) end = pivot sectionLength = end - start pivot = start + Math.floor(sectionLength / 2) pivotElem = this.elements[pivot] } if (pivotElem > elem) return pivot if (pivotElem < elem) return pivot + 1 } /** * Creates a new lunr.SortedSet that contains the elements in the intersection * of this set and the passed set. * * @param {lunr.SortedSet} otherSet The set to intersect with this set. * @returns {lunr.SortedSet} * @memberOf SortedSet */ lunr.SortedSet.prototype.intersect = function (otherSet) { var intersectSet = new lunr.SortedSet, i = 0, j = 0, a_len = this.length, b_len = otherSet.length, a = this.elements, b = otherSet.elements while (true) { if (i > a_len - 1 || j > b_len - 1) break if (a[i] === b[j]) { intersectSet.add(a[i]) i++, j++ continue } if (a[i] < b[j]) { i++ continue } if (a[i] > b[j]) { j++ continue } }; return intersectSet } /** * Makes a copy of this set * * @returns {lunr.SortedSet} * @memberOf SortedSet */ lunr.SortedSet.prototype.clone = function () { var clone = new lunr.SortedSet clone.elements = this.toArray() clone.length = clone.elements.length return clone } /** * Creates a new lunr.SortedSet that contains the elements in the union * of this set and the passed set. * * @param {lunr.SortedSet} otherSet The set to union with this set. * @returns {lunr.SortedSet} * @memberOf SortedSet */ lunr.SortedSet.prototype.union = function (otherSet) { var longSet, shortSet, unionSet if (this.length >= otherSet.length) { longSet = this, shortSet = otherSet } else { longSet = otherSet, shortSet = this } unionSet = longSet.clone() for(var i = 0, shortSetElements = shortSet.toArray(); i < shortSetElements.length; i++){ unionSet.add(shortSetElements[i]) } return unionSet } /** * Returns a representation of the sorted set ready for serialisation. * * @returns {Array} * @memberOf SortedSet */ lunr.SortedSet.prototype.toJSON = function () { return this.toArray() } /** * export the module via AMD, CommonJS or as a browser global * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js */ ;(function (root, factory) { if (typeof define === 'function' && define.amd) { // AMD. Register as an anonymous module. define(factory) } else if (typeof exports === 'object') { /** * Node. Does not work with strict CommonJS, but * only CommonJS-like enviroments that support module.exports, * like Node. */ module.exports = factory() } else { // Browser globals (root is window) root.elasticlunr = factory() } }(this, function () { /** * Just return a value to define the module export. * This example returns an object, but the module * can return a function as the exported value. */ return elasticlunr })) })();