diff --git a/build.xml b/build.xml index c6f4b519efa..5bcb16a1609 100644 --- a/build.xml +++ b/build.xml @@ -19,7 +19,7 @@ information: Portions Copyright [yyyy] [name of copyright owner] CDDL HEADER END Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. -Portions Copyright (c) 2017, Chris Fraire . +Portions Copyright (c) 2017-2018, Chris Fraire . --> . + + + + + + + + + + + + @@ -518,6 +530,10 @@ Portions Copyright (c) 2017, Chris Fraire . + + + + @@ -535,6 +551,14 @@ Portions Copyright (c) 2017, Chris Fraire . dest="lib/${lucene-core.jar}" verbose="true" usetimestamp="true"/> + + + + . +Portions Copyright (c) 2017-2018, Chris Fraire . --> . org.apache.lucene lucene-analyzers-common + + org.apache.lucene + lucene-highlighter + + + org.apache.lucene + lucene-join + + + org.apache.lucene + lucene-memory + + + org.apache.lucene + lucene-queries + org.apache.lucene lucene-queryparser diff --git a/opengrok-web-nbproject/nbproject/build-impl.xml b/opengrok-web-nbproject/nbproject/build-impl.xml index 2a54d532751..2f7fcc9119d 100644 --- a/opengrok-web-nbproject/nbproject/build-impl.xml +++ b/opengrok-web-nbproject/nbproject/build-impl.xml @@ -991,6 +991,10 @@ exists or setup the property manually. For example like this: + + + + @@ -1004,6 +1008,10 @@ exists or setup the property manually. For example like this: + + + + diff --git a/opengrok-web-nbproject/nbproject/project.properties b/opengrok-web-nbproject/nbproject/project.properties index 0ecc27a0a78..64d5a966cdd 100644 --- a/opengrok-web-nbproject/nbproject/project.properties +++ b/opengrok-web-nbproject/nbproject/project.properties @@ -40,10 +40,18 @@ j2ee.platform.classpath=${j2ee.server.home}/lib/annotations-api.jar:${j2ee.serve lucene.version=7.2.1 lucene-core.jar=lucene-core-${lucene.version}.jar lucene-analyzers-common.jar=lucene-analyzers-common-${lucene.version}.jar +lucene-highlighter.jar=lucene-highlighter-${lucene.version}.jar +lucene-join.jar=lucene-join-${lucene.version}.jar +lucene-memory.jar=lucene-memory-${lucene.version}.jar +lucene-queries.jar=lucene-queries-${lucene.version}.jar lucene-queryparser.jar=lucene-queryparser-${lucene.version}.jar lucene-suggest.jar=lucene-suggest-${lucene.version}.jar file.reference.lucene-core.jar=../lib/${lucene-core.jar} file.reference.lucene-analyzers-common.jar=../lib/${lucene-analyzers-common.jar} +file.reference.lucene-highlighter.jar=../lib/${lucene-highlighter.jar} +file.reference.lucene-join.jar=../lib/${lucene-join.jar} +file.reference.lucene-memory.jar=../lib/${lucene-memory.jar} +file.reference.lucene-queries.jar=../lib/${lucene-queries.jar} file.reference.lucene-queryparser.jar=../lib/${lucene-queryparser.jar} file.reference.lucene-suggest.jar=../lib/${lucene-suggest.jar} file.reference.ant.jar=${ant.library.dir}/ant.jar @@ -67,6 +75,10 @@ javac.classpath=\ ${file.reference.bcel.jar}:\ ${file.reference.lucene-core.jar}:\ ${file.reference.lucene-analyzers-common.jar}:\ + ${file.reference.lucene-highlighter.jar}:\ + ${file.reference.lucene-join.jar}:\ + ${file.reference.lucene-memory.jar}:\ + ${file.reference.lucene-queries.jar}:\ ${file.reference.lucene-queryparser.jar}:\ ${file.reference.lucene-suggest.jar}:\ ${file.reference.json-simple-1.1.1.jar} diff --git a/opengrok-web-nbproject/nbproject/project.xml b/opengrok-web-nbproject/nbproject/project.xml index 7dde145615c..b53b1f6f5ab 100644 --- a/opengrok-web-nbproject/nbproject/project.xml +++ b/opengrok-web-nbproject/nbproject/project.xml @@ -30,6 +30,22 @@ ${file.reference.lucene-analyzers-common.jar} WEB-INF/lib + + ${file.reference.lucene-highlighter.jar} + WEB-INF/lib + + + ${file.reference.lucene-join.jar} + WEB-INF/lib + + + ${file.reference.lucene-memory.jar} + WEB-INF/lib + + + ${file.reference.lucene-queries.jar} + WEB-INF/lib + ${file.reference.lucene-queryparser.jar} WEB-INF/lib diff --git a/pom.xml b/pom.xml index 97af2ef6491..8a6b4d665cb 100644 --- a/pom.xml +++ b/pom.xml @@ -19,6 +19,7 @@ information: Portions Copyright [yyyy] [name of copyright owner] CDDL HEADER END Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. +Portions Copyright (c) 2018, Chris Fraire . --> lucene-analyzers-common ${lucene.version} + + org.apache.lucene + lucene-highlighter + ${lucene.version} + + + org.apache.lucene + lucene-join + ${lucene.version} + + + org.apache.lucene + lucene-memory + ${lucene.version} + + + org.apache.lucene + lucene-queries + ${lucene.version} + org.apache.lucene lucene-queryparser diff --git a/src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java b/src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java index ac2f165a693..9c129bf84d7 100644 --- a/src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java +++ b/src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java @@ -19,7 +19,7 @@ /* * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright (c) 2017, Chris Fraire . + * Portions Copyright (c) 2017-2018, Chris Fraire . */ package org.opensolaris.opengrok.analysis; @@ -213,6 +213,13 @@ public class AnalyzerGuru { private static final Map fileTypeDescriptions = new TreeMap<>(); + /** + * Maps from {@link FileAnalyzer#getFileTypeName()} to + * {@link FileAnalyzerFactory} + */ + private static final Map FILETYPE_FACTORIES = + new HashMap<>(); + /* * If you write your own analyzer please register it here. The order is * important for any factory that uses a FileAnalyzerFactory.Matcher @@ -338,6 +345,9 @@ private static void registerAnalyzer(FileAnalyzerFactory factory) { } matchers.addAll(factory.getMatchers()); factories.add(factory); + + FileAnalyzer fa = factory.getAnalyzer(); + FILETYPE_FACTORIES.put(fa.getFileTypeName(), factory); } /** @@ -385,6 +395,17 @@ public static FileAnalyzer getAnalyzer() { return DEFAULT_ANALYZER_FACTORY.getAnalyzer(); } + /** + * Gets an analyzer for the specified {@code fileTypeName} if it accords + * with a known {@link FileAnalyzer#getFileTypeName()}. + * @param fileTypeName a defined name + * @return a defined instance if known or otherwise {@code null} + */ + public static FileAnalyzer getAnalyzer(String fileTypeName) { + FileAnalyzerFactory factory = FILETYPE_FACTORIES.get(fileTypeName); + return factory == null ? null : factory.getAnalyzer(); + } + /** * Get an analyzer suited to analyze a file. This function will reuse * analyzers since they are costly. @@ -531,6 +552,9 @@ public static void writeXref(FileAnalyzerFactory factory, Reader in, args.setProject(project); FileAnalyzer analyzer = factory.getAnalyzer(); + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + analyzer.setScopesEnabled(env.isScopesEnabled()); + analyzer.setFoldingEnabled(env.isFoldingEnabled()); analyzer.writeXref(args); } diff --git a/src/org/opensolaris/opengrok/analysis/CtagsReader.java b/src/org/opensolaris/opengrok/analysis/CtagsReader.java index c85b5c5f531..b04a3f13a71 100644 --- a/src/org/opensolaris/opengrok/analysis/CtagsReader.java +++ b/src/org/opensolaris/opengrok/analysis/CtagsReader.java @@ -378,8 +378,8 @@ private static String cutPattern(String tagLine, int startTab, int endTab) { * Adds a tag to a {@code Definitions} instance. */ private void addTag(Definitions defs, int lineno, String symbol, - String type, String text, String namespace, String signature, - int lineStart, int lineEnd) { + String type, String text, String namespace, String signature, + int lineStart, int lineEnd) { // The strings are frequently repeated (a symbol can be used in // multiple definitions, multiple definitions can have the same type, // one line can contain multiple definitions). Intern them to minimize @@ -397,9 +397,7 @@ private void addTag(Definitions defs, int lineno, String symbol, * syntax. * @return a defined instance */ - private CpatIndex bestIndexOfTag(int lineno, String whole, - String str) { - + private CpatIndex bestIndexOfTag(int lineno, String whole, String str) { if (whole.length() < 1) { return new CpatIndex(lineno, 0, 1, true); } diff --git a/src/org/opensolaris/opengrok/analysis/OGKTextField.java b/src/org/opensolaris/opengrok/analysis/OGKTextField.java new file mode 100644 index 00000000000..887583b280f --- /dev/null +++ b/src/org/opensolaris/opengrok/analysis/OGKTextField.java @@ -0,0 +1,72 @@ +/* + * This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 + * United States License. To view a copy of this license, visit + * http://creativecommons.org/licenses/by-sa/3.0/us/ or send a letter to + * Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + * + * Copyright (c) 2012, https://stackoverflow.com/users/1270457/amas + * Copyright (c) 2012, https://stackoverflow.com/questions/11945728/how-to-use-termvector-lucene-4-0 + * Portions Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.analysis; + +import java.io.Reader; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; + +/** + * Represents an OpenGrok-customized tokenized text field to centralize settings + * across all the analyzers. + */ +public class OGKTextField extends Field { + + /** Indexed, tokenized, not stored. */ + public static final FieldType TYPE_NOT_STORED = new FieldType(); + + /** Indexed, tokenized, stored. */ + public static final FieldType TYPE_STORED = new FieldType(); + + static { + TYPE_NOT_STORED.setIndexOptions( + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + TYPE_NOT_STORED.setTokenized(true); + TYPE_NOT_STORED.freeze(); + + TYPE_STORED.setIndexOptions( + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + TYPE_STORED.setTokenized(true); + TYPE_STORED.setStored(true); + TYPE_STORED.freeze(); + } + + /** + * Creates a new un-stored instance with {@link Reader} value. + */ + public OGKTextField(String name, Reader reader) { + super(name, reader, TYPE_NOT_STORED); + } + + /** + * Creates a new instance with {@link Reader} value. + */ + public OGKTextField(String name, Reader reader, Store store) { + super(name, reader, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED); + } + + /** + * Creates a new instance with {@code String} value. + */ + public OGKTextField(String name, String value, Store store) { + super(name, value, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED); + } + + /** + * Creates a new un-stored instance with {@link TokenStream} value. + */ + public OGKTextField(String name, TokenStream stream) { + super(name, stream, TYPE_NOT_STORED); + } +} diff --git a/src/org/opensolaris/opengrok/analysis/OGKTextVecField.java b/src/org/opensolaris/opengrok/analysis/OGKTextVecField.java new file mode 100644 index 00000000000..2384f2acc90 --- /dev/null +++ b/src/org/opensolaris/opengrok/analysis/OGKTextVecField.java @@ -0,0 +1,74 @@ +/* + * This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 + * United States License. To view a copy of this license, visit + * http://creativecommons.org/licenses/by-sa/3.0/us/ or send a letter to + * Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + * + * Copyright (c) 2012, https://stackoverflow.com/users/1270457/amas + * Copyright (c) 2012, https://stackoverflow.com/questions/11945728/how-to-use-termvector-lucene-4-0 + * Portions Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.analysis; + +import java.io.Reader; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; + +/** + * Represents an OpenGrok-customized tokenized, text field with stored term + * vectors to centralize settings across all the analyzers. + */ +public class OGKTextVecField extends Field { + + /** Indexed, tokenized, not stored. */ + public static final FieldType TYPE_NOT_STORED = new FieldType(); + + /** Indexed, tokenized, stored. */ + public static final FieldType TYPE_STORED = new FieldType(); + + static { + TYPE_NOT_STORED.setIndexOptions( + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + TYPE_NOT_STORED.setTokenized(true); + TYPE_NOT_STORED.setStoreTermVectors(true); + TYPE_NOT_STORED.freeze(); + + TYPE_STORED.setIndexOptions( + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + TYPE_STORED.setTokenized(true); + TYPE_STORED.setStoreTermVectors(true); + TYPE_STORED.setStored(true); + TYPE_STORED.freeze(); + } + + /** + * Creates a new un-stored instance with {@link Reader} value. + */ + public OGKTextVecField(String name, Reader reader) { + super(name, reader, TYPE_NOT_STORED); + } + + /** + * Creates a new instance with {@link Reader} value. + */ + public OGKTextVecField(String name, Reader reader, Store store) { + super(name, reader, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED); + } + + /** + * Creates a new instance with {@code String} value. + */ + public OGKTextVecField(String name, String value, Store store) { + super(name, value, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED); + } + + /** + * Creates a new un-stored instance with {@link TokenStream} value. + */ + public OGKTextVecField(String name, TokenStream stream) { + super(name, stream, TYPE_NOT_STORED); + } +} diff --git a/src/org/opensolaris/opengrok/analysis/archive/TarAnalyzer.java b/src/org/opensolaris/opengrok/analysis/archive/TarAnalyzer.java index af014554601..8da143ea515 100644 --- a/src/org/opensolaris/opengrok/analysis/archive/TarAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/archive/TarAnalyzer.java @@ -19,6 +19,7 @@ /* * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2018, Chris Fraire . */ package org.opensolaris.opengrok.analysis.archive; @@ -26,13 +27,14 @@ import java.io.Writer; import java.util.ArrayList; import org.apache.lucene.document.Document; -import org.apache.lucene.document.TextField; import org.apache.tools.tar.TarEntry; import org.apache.tools.tar.TarInputStream; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.IteratorReader; import org.opensolaris.opengrok.analysis.StreamSource; +import org.opensolaris.opengrok.analysis.OGKTextField; +import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.web.Util; /** @@ -62,6 +64,6 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOExc } } - doc.add(new TextField("full", new IteratorReader(names))); + doc.add(new OGKTextField(QueryBuilder.FULL, new IteratorReader(names))); } } diff --git a/src/org/opensolaris/opengrok/analysis/archive/ZipAnalyzer.java b/src/org/opensolaris/opengrok/analysis/archive/ZipAnalyzer.java index 2e09de62cdc..ecc0d93fc00 100644 --- a/src/org/opensolaris/opengrok/analysis/archive/ZipAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/archive/ZipAnalyzer.java @@ -19,6 +19,7 @@ /* * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2018, Chris Fraire . */ package org.opensolaris.opengrok.analysis.archive; @@ -28,11 +29,12 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import org.apache.lucene.document.Document; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.IteratorReader; import org.opensolaris.opengrok.analysis.StreamSource; +import org.opensolaris.opengrok.analysis.OGKTextField; +import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.web.Util; /** @@ -62,6 +64,6 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOExc } } - doc.add(new TextField("full", new IteratorReader(names))); + doc.add(new OGKTextField(QueryBuilder.FULL, new IteratorReader(names))); } } diff --git a/src/org/opensolaris/opengrok/analysis/document/MandocAnalyzer.java b/src/org/opensolaris/opengrok/analysis/document/MandocAnalyzer.java index e5a5ef55e96..6b013ec4286 100644 --- a/src/org/opensolaris/opengrok/analysis/document/MandocAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/document/MandocAnalyzer.java @@ -19,7 +19,7 @@ /* * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright (c) 2017, Chris Fraire . + * Portions Copyright (c) 2017-2018, Chris Fraire . */ package org.opensolaris.opengrok.analysis.document; @@ -27,12 +27,12 @@ import java.io.Reader; import java.io.Writer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.JFlexTokenizer; import org.opensolaris.opengrok.analysis.StreamSource; import org.opensolaris.opengrok.analysis.TextAnalyzer; +import org.opensolaris.opengrok.analysis.OGKTextField; import org.opensolaris.opengrok.analysis.WriteXrefArgs; import org.opensolaris.opengrok.analysis.Xrefer; import org.opensolaris.opengrok.search.QueryBuilder; @@ -58,7 +58,8 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut) // this is to explicitly use appropriate analyzers tokenstream to // workaround #1376 symbols search works like full text search this.symbolTokenizer.setReader(getReader(src.getStream())); - TextField full = new TextField(QueryBuilder.FULL, symbolTokenizer); + OGKTextField full = new OGKTextField(QueryBuilder.FULL, + symbolTokenizer); doc.add(full); if (xrefOut != null) { diff --git a/src/org/opensolaris/opengrok/analysis/document/TroffAnalyzer.java b/src/org/opensolaris/opengrok/analysis/document/TroffAnalyzer.java index 700c1c93712..befd4475a19 100644 --- a/src/org/opensolaris/opengrok/analysis/document/TroffAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/document/TroffAnalyzer.java @@ -19,7 +19,7 @@ /* * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright (c) 2017, Chris Fraire . + * Portions Copyright (c) 2017-2018, Chris Fraire . */ package org.opensolaris.opengrok.analysis.document; @@ -27,13 +27,13 @@ import java.io.Reader; import java.io.Writer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.JFlexTokenizer; import org.opensolaris.opengrok.analysis.JFlexXref; import org.opensolaris.opengrok.analysis.StreamSource; import org.opensolaris.opengrok.analysis.TextAnalyzer; +import org.opensolaris.opengrok.analysis.OGKTextField; import org.opensolaris.opengrok.analysis.WriteXrefArgs; import org.opensolaris.opengrok.analysis.Xrefer; import org.opensolaris.opengrok.search.QueryBuilder; @@ -58,7 +58,8 @@ protected TroffAnalyzer(FileAnalyzerFactory factory) { public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException { //this is to explicitly use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search this.symbolTokenizer.setReader(getReader(src.getStream())); - TextField full = new TextField(QueryBuilder.FULL, symbolTokenizer); + OGKTextField full = new OGKTextField(QueryBuilder.FULL, + symbolTokenizer); doc.add(full); if (xrefOut != null) { diff --git a/src/org/opensolaris/opengrok/analysis/executables/ELFAnalyzer.java b/src/org/opensolaris/opengrok/analysis/executables/ELFAnalyzer.java index 2cf1e2d750d..6c56fc6d8b1 100644 --- a/src/org/opensolaris/opengrok/analysis/executables/ELFAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/executables/ELFAnalyzer.java @@ -19,6 +19,7 @@ /* * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2018, Chris Fraire . */ package org.opensolaris.opengrok.analysis.executables; @@ -36,11 +37,12 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.StreamSource; +import org.opensolaris.opengrok.analysis.OGKTextField; import org.opensolaris.opengrok.logger.LoggerFactory; +import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.web.Util; /** @@ -82,7 +84,7 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOExc } if (content != null && !content.isEmpty()) { - doc.add(new TextField("full", content, Store.NO)); + doc.add(new OGKTextField(QueryBuilder.FULL, content, Store.NO)); if (xrefOut != null) { xrefOut.append(""); Util.htmlize(content, xrefOut); diff --git a/src/org/opensolaris/opengrok/analysis/executables/JarAnalyzer.java b/src/org/opensolaris/opengrok/analysis/executables/JarAnalyzer.java index 5ce5f63c734..23e8f6a53c3 100644 --- a/src/org/opensolaris/opengrok/analysis/executables/JarAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/executables/JarAnalyzer.java @@ -30,11 +30,11 @@ import java.util.zip.ZipInputStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.AnalyzerGuru; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.StreamSource; +import org.opensolaris.opengrok.analysis.OGKTextField; import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.web.Util; @@ -78,7 +78,7 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOExc * is stored here (Store.YES) for analyzer convenience. */ String fstr = fout.toString(); - doc.add(new TextField(QueryBuilder.FULL, fstr, Store.YES)); + doc.add(new OGKTextField(QueryBuilder.FULL, fstr, Store.YES)); fout.setLength(0); FileAnalyzerFactory fac = AnalyzerGuru.find(ename); diff --git a/src/org/opensolaris/opengrok/analysis/executables/JavaClassAnalyzer.java b/src/org/opensolaris/opengrok/analysis/executables/JavaClassAnalyzer.java index 101a5d0df7f..0877376244e 100644 --- a/src/org/opensolaris/opengrok/analysis/executables/JavaClassAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/executables/JavaClassAnalyzer.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; import java.util.ArrayList; @@ -59,10 +58,11 @@ import org.apache.bcel.classfile.Utility; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.IteratorReader; +import org.opensolaris.opengrok.analysis.OGKTextField; +import org.opensolaris.opengrok.analysis.OGKTextVecField; import org.opensolaris.opengrok.analysis.StreamSource; import org.opensolaris.opengrok.configuration.RuntimeEnvironment; import org.opensolaris.opengrok.logger.LoggerFactory; @@ -141,13 +141,13 @@ void analyze(Document doc, InputStream in, Writer xrefOut) throws IOException { */ String dstr = dout.toString(); - doc.add(new TextField(QueryBuilder.DEFS, dstr, Store.YES)); + doc.add(new OGKTextField(QueryBuilder.DEFS, dstr, Store.YES)); String rstr = rout.toString(); - doc.add(new TextField(QueryBuilder.REFS, rstr, Store.YES)); + doc.add(new OGKTextField(QueryBuilder.REFS, rstr, Store.YES)); String fstr = fout.toString(); - doc.add(new TextField(QueryBuilder.FULL, fstr, Store.YES)); + doc.add(new OGKTextField(QueryBuilder.FULL, fstr, Store.YES)); } diff --git a/src/org/opensolaris/opengrok/analysis/plain/DefinitionsTokenStream.java b/src/org/opensolaris/opengrok/analysis/plain/DefinitionsTokenStream.java index e7976a90076..0de06723857 100644 --- a/src/org/opensolaris/opengrok/analysis/plain/DefinitionsTokenStream.java +++ b/src/org/opensolaris/opengrok/analysis/plain/DefinitionsTokenStream.java @@ -66,7 +66,7 @@ public class DefinitionsTokenStream extends TokenStream { * @throws IOException if I/O error occurs */ public void initialize(Definitions defs, StreamSource src, - ReaderWrapper wrapper) throws IOException { + ReaderWrapper wrapper) throws IOException { if (defs == null) { throw new IllegalArgumentException("`defs' is null"); } diff --git a/src/org/opensolaris/opengrok/analysis/plain/PlainAnalyzer.java b/src/org/opensolaris/opengrok/analysis/plain/PlainAnalyzer.java index b0ad590b34a..4cc48c6b461 100644 --- a/src/org/opensolaris/opengrok/analysis/plain/PlainAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/plain/PlainAnalyzer.java @@ -27,19 +27,22 @@ import java.io.InputStream; import java.io.Reader; import java.io.Writer; +import java.util.logging.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.document.StoredField; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.Definitions; import org.opensolaris.opengrok.analysis.ExpandTabsReader; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.JFlexTokenizer; import org.opensolaris.opengrok.analysis.JFlexXref; +import org.opensolaris.opengrok.analysis.OGKTextField; +import org.opensolaris.opengrok.analysis.OGKTextVecField; import org.opensolaris.opengrok.analysis.Scopes; import org.opensolaris.opengrok.analysis.StreamSource; import org.opensolaris.opengrok.analysis.TextAnalyzer; import org.opensolaris.opengrok.analysis.WriteXrefArgs; import org.opensolaris.opengrok.analysis.Xrefer; +import org.opensolaris.opengrok.logger.LoggerFactory; import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.util.NullWriter; @@ -88,16 +91,16 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException { Definitions defs = null; - doc.add(new TextField(QueryBuilder.FULL, getReader(src.getStream()))); + doc.add(new OGKTextField(QueryBuilder.FULL, + getReader(src.getStream()))); + String fullpath = doc.get(QueryBuilder.FULLPATH); if (fullpath != null && ctags != null) { defs = ctags.doCtags(fullpath); if (defs != null && defs.numberOfSymbols() > 0) { - DefinitionsTokenStream defstream = new DefinitionsTokenStream(); - defstream.initialize(defs, src, null); - doc.add(new TextField(QueryBuilder.DEFS, defstream)); + tryAddingDefs(doc, defs, src, fullpath); //this is to explicitly use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search - TextField ref = new TextField(QueryBuilder.REFS, + OGKTextField ref = new OGKTextField(QueryBuilder.REFS, this.symbolTokenizer); this.symbolTokenizer.setReader(getReader(src.getStream())); doc.add(ref); @@ -134,4 +137,39 @@ public void analyze(Document doc, StreamSource src, Writer xrefOut) } } } + + private void tryAddingDefs(Document doc, Definitions defs, StreamSource src, + String fullpath) throws IOException { + + DefinitionsTokenStream defstream = new DefinitionsTokenStream(); + defstream.initialize(defs, src, (reader) -> wrapReader(reader)); + + /** + * Testing showed that UnifiedHighlighter will fall back to + * ANALYSIS in the presence of multi-term queries (MTQs) such as + * prefixes and wildcards even for fields that are analyzed with + * POSTINGS -- i.e. with DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS. + * This is despite UnifiedHighlighter seeming to indicate that + * postings should be sufficient in the comment for + * shouldHandleMultiTermQuery(String): "MTQ highlighting can be + * expensive, particularly when using offsets in postings." + * DEFS re-analysis will not be correct, however, as the + * PlainAnalyzer which UnifiedHighlighter will use on-the-fly will + * not correctly integrate ctags Definitions. + * Storing term vectors, however, allows UnifiedHighlighter to + * avoid re-analysis at the cost of a larger index. As DEFS are a + * small subset of source text, it seems worth the cost to get + * accurate highlighting for DEFS MTQs. + */ + doc.add(new OGKTextVecField(QueryBuilder.DEFS, defstream)); + } + + /** + * Identical to {@link #getReader(java.io.InputStream)} but overlaying an + * existing stream. + * @see #getReader(java.io.InputStream) + */ + private Reader wrapReader(Reader reader) { + return ExpandTabsReader.wrap(reader, project); + } } diff --git a/src/org/opensolaris/opengrok/analysis/plain/XMLAnalyzer.java b/src/org/opensolaris/opengrok/analysis/plain/XMLAnalyzer.java index e7bac5cdc5a..b176b9339f1 100644 --- a/src/org/opensolaris/opengrok/analysis/plain/XMLAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/plain/XMLAnalyzer.java @@ -19,7 +19,7 @@ /* * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright (c) 2017, Chris Fraire . + * Portions Copyright (c) 2017-2018, Chris Fraire . */ package org.opensolaris.opengrok.analysis.plain; @@ -27,13 +27,14 @@ import java.io.Reader; import java.io.Writer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.JFlexXref; import org.opensolaris.opengrok.analysis.StreamSource; import org.opensolaris.opengrok.analysis.TextAnalyzer; +import org.opensolaris.opengrok.analysis.OGKTextField; import org.opensolaris.opengrok.analysis.WriteXrefArgs; import org.opensolaris.opengrok.analysis.Xrefer; +import org.opensolaris.opengrok.search.QueryBuilder; /** * Analyzes HTML files Created on September 30, 2005 @@ -52,7 +53,8 @@ protected XMLAnalyzer(FileAnalyzerFactory factory) { @Override public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException { - doc.add(new TextField("full", getReader(src.getStream()))); + doc.add(new OGKTextField(QueryBuilder.FULL, + getReader(src.getStream()))); if (xrefOut != null) { try (Reader in = getReader(src.getStream())) { diff --git a/src/org/opensolaris/opengrok/analysis/uue/UuencodeAnalyzer.java b/src/org/opensolaris/opengrok/analysis/uue/UuencodeAnalyzer.java index abc7d9da163..a842044d7e9 100644 --- a/src/org/opensolaris/opengrok/analysis/uue/UuencodeAnalyzer.java +++ b/src/org/opensolaris/opengrok/analysis/uue/UuencodeAnalyzer.java @@ -19,7 +19,7 @@ /* * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. - * Portions Copyright (c) 2017, Chris Fraire . + * Portions Copyright (c) 2017-2018, Chris Fraire . */ package org.opensolaris.opengrok.analysis.uue; @@ -27,13 +27,13 @@ import java.io.Reader; import java.io.Writer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.TextField; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzerFactory; import org.opensolaris.opengrok.analysis.JFlexTokenizer; import org.opensolaris.opengrok.analysis.JFlexXref; import org.opensolaris.opengrok.analysis.StreamSource; import org.opensolaris.opengrok.analysis.TextAnalyzer; +import org.opensolaris.opengrok.analysis.OGKTextField; import org.opensolaris.opengrok.analysis.WriteXrefArgs; import org.opensolaris.opengrok.search.QueryBuilder; @@ -56,7 +56,8 @@ protected UuencodeAnalyzer(FileAnalyzerFactory factory) { @Override public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException { //this is to explicitly use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search - TextField full = new TextField(QueryBuilder.FULL, symbolTokenizer); + OGKTextField full = new OGKTextField(QueryBuilder.FULL, + this.symbolTokenizer); this.symbolTokenizer.setReader(getReader(src.getStream())); doc.add(full); diff --git a/src/org/opensolaris/opengrok/configuration/Configuration.java b/src/org/opensolaris/opengrok/configuration/Configuration.java index d0c753824a6..b764eb5bee4 100644 --- a/src/org/opensolaris/opengrok/configuration/Configuration.java +++ b/src/org/opensolaris/opengrok/configuration/Configuration.java @@ -17,7 +17,7 @@ * CDDL HEADER END */ - /* +/* * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017-2018, Chris Fraire . */ @@ -92,6 +92,13 @@ public final class Configuration { * value. */ private static final String NEGATIVE_NUMBER_ERROR = "Invalid value for \"%s\" - \"%s\". Expected value greater or equal than 0"; + /** + * Error string for non-positive numbers (could be int, double, long, ...). + * First argument is the name of the property, second argument is the actual + * value. + */ + private static final String NONPOSITIVE_NUMBER_ERROR = + "Invalid value for \"%s\" - \"%s\". Expected value greater than 0"; private String ctags; @@ -186,6 +193,8 @@ public final class Configuration { private boolean tagsEnabled; private int hitsPerPage; private int cachePages; + private short contextLimit; // initialized non-zero in ctor + private short contextSurround; private boolean lastEditedDisplayMode; private String CTagsExtraOptionsFile; private int scanningDepth; @@ -389,6 +398,8 @@ public Configuration() { setCachePages(5); setCommandTimeout(600); // 10 minutes setCompressXref(true); + setContextLimit((short)10); + //contextSurround is default(short) //ctags is default(String) setCurrentIndexedCollapseThreshold(27); setDataRoot(null); @@ -544,6 +555,52 @@ public void setMandoc(String value) { this.mandoc = value; } + /** + * Gets the total number of context lines per file to show in cases where it + * is limited. Default is 10. + * @return a value greater than zero + */ + public short getContextLimit() { + return contextLimit; + } + + /** + * Sets the total number of context lines per file to show in cases where it + * is limited. + * @param value a value greater than zero + * @throws IllegalArgumentException if {@code value} is not positive + */ + public void setContextLimit(short value) throws IllegalArgumentException { + if (value < 1) { + throw new IllegalArgumentException( + String.format(NONPOSITIVE_NUMBER_ERROR, "contextLimit", value)); + } + this.contextLimit = value; + } + + /** + * Gets the number of context lines to show before or after any match. + * Default is zero. + * @return a value greater than or equal to zero + */ + public short getContextSurround() { + return contextSurround; + } + + /** + * Sets the number of context lines to show before or after any match. + * @param value a value greater than or equal to zero + * @throws IllegalArgumentException if {@code value} is negative + */ + public void setContextSurround(short value) + throws IllegalArgumentException { + if (value < 0) { + throw new IllegalArgumentException( + String.format(NEGATIVE_NUMBER_ERROR, "contextSurround", value)); + } + this.contextSurround = value; + } + public int getCachePages() { return cachePages; } diff --git a/src/org/opensolaris/opengrok/configuration/RuntimeEnvironment.java b/src/org/opensolaris/opengrok/configuration/RuntimeEnvironment.java index d700ea10d6f..6ac5f85c567 100644 --- a/src/org/opensolaris/opengrok/configuration/RuntimeEnvironment.java +++ b/src/org/opensolaris/opengrok/configuration/RuntimeEnvironment.java @@ -136,7 +136,20 @@ public final class RuntimeEnvironment { private int messagesInTheSystem = 0; private Statistics statistics = new Statistics(); - + + /** + * Stores a transient value when + * {@link #setContextLimit(java.lang.Short)} is called -- i.e. the + * value is not mediated to {@link Configuration}. + */ + private Short contextLimit; + /** + * Stores a transient value when + * {@link #setContextSurround(java.lang.Short)} is called -- i.e. the + * value is not mediated to {@link Configuration}. + */ + private Short contextSurround; + private static final IndexTimestamp indexTime = new IndexTimestamp(); /** @@ -1288,7 +1301,64 @@ public boolean getListDirsFirst() { public void setListDirsFirst(boolean flag) { threadConfig.get().setListDirsFirst(flag); } - + + /** + * Gets the total number of context lines per file to show: either the last + * value passed successfully to {@link #setContextLimit(java.lang.Short)} + * or {@link Configuration#getContextLimit()} as a default. + * @return a value greater than zero + */ + public short getContextLimit() { + return contextLimit != null ? contextLimit : + threadConfig.get().getContextLimit(); + } + + /** + * Sets the total number of context lines per file to show, or resets to use + * {@link Configuration#getContextLimit()}. + *

+ * N.b. the value is not mediated to {@link Configuration}. + * @param value a defined value or {@code null} to reset to use the + * {@link Configuration#getContextSurround()} + * @throws IllegalArgumentException if {@code value} is not positive + */ + public void setContextLimit(Short value) + throws IllegalArgumentException { + if (value < 1) { + throw new IllegalArgumentException("value is not positive"); + } + contextLimit = value; + } + + /** + * Gets the number of context lines to show before or after any match: + * either the last value passed successfully to + * {@link #setContextSurround(java.lang.Short)} or + * {@link Configuration#getContextSurround()} as a default. + * @return a value greater than or equal to zero + */ + public short getContextSurround() { + return contextSurround != null ? contextSurround : + threadConfig.get().getContextSurround(); + } + + /** + * Sets the number of context lines to show before or after any match, or + * resets to use {@link Configuration#getContextSurround()}. + *

+ * N.b. the value is not mediated to {@link Configuration}. + * @param value a defined value or {@code null} to reset to use the + * {@link Configuration#getContextSurround()} + * @throws IllegalArgumentException if {@code value} is negative + */ + public void setContextSurround(Short value) + throws IllegalArgumentException { + if (value < 0) { + throw new IllegalArgumentException("value is negative"); + } + contextSurround = value; + } + /** * Read an configuration file and set it as the current configuration. * diff --git a/src/org/opensolaris/opengrok/index/IndexAnalysisSettingsAccessor.java b/src/org/opensolaris/opengrok/index/IndexAnalysisSettingsAccessor.java index b573c12f9b9..2b478d37ea5 100644 --- a/src/org/opensolaris/opengrok/index/IndexAnalysisSettingsAccessor.java +++ b/src/org/opensolaris/opengrok/index/IndexAnalysisSettingsAccessor.java @@ -55,12 +55,28 @@ public class IndexAnalysisSettingsAccessor { /** * Searches for a document with a {@link QueryBuilder#OBJUID} value matching - * {@link #INDEX_ANALYSIS_SETTINGS_OBJUID}. + * {@link #INDEX_ANALYSIS_SETTINGS_OBJUID}. The first document found is + * returned, so this should not be called with a MultiReader. * @param reader a defined instance * @return a defined instance or {@code null} if none could be found * @throws IOException if I/O error occurs while searching Lucene */ public IndexAnalysisSettings read(IndexReader reader) throws IOException { + IndexAnalysisSettings[] res = read(reader, 1); + return res.length > 0 ? res[0] : null; + } + + /** + * Searches for documents with a {@link QueryBuilder#OBJUID} value matching + * {@link #INDEX_ANALYSIS_SETTINGS_OBJUID}. + * @param reader a defined instance + * @param n a limit to the number of documents returned. The method may + * return less. + * @return a defined instance, which is empty if none could be found + * @throws IOException if I/O error occurs while searching Lucene + */ + public IndexAnalysisSettings[] read(IndexReader reader, int n) + throws IOException { IndexSearcher searcher = new IndexSearcher(reader); Query q; try { @@ -70,20 +86,24 @@ public IndexAnalysisSettings read(IndexReader reader) throws IOException { // This is not expected, so translate to RuntimeException. throw new RuntimeException(ex); } - TopDocs top = searcher.search(q, 1); - if (top.totalHits < 1) { - return null; - } + TopDocs top = searcher.search(q, n); - Document doc = searcher.doc(top.scoreDocs[0].doc); - IndexableField objser = doc.getField(QueryBuilder.OBJSER); - try { - return objser == null ? null : IndexAnalysisSettings.deserialize( - objser.binaryValue().bytes); - } catch (ClassNotFoundException ex) { - // This is not expected, so translate to RuntimeException. - throw new RuntimeException(ex); + int nres = top.totalHits > n ? n : (int)top.totalHits; + IndexAnalysisSettings[] res = new IndexAnalysisSettings[nres]; + + for (int i = 0; i < nres; ++i) { + Document doc = searcher.doc(top.scoreDocs[i].doc); + IndexableField objser = doc.getField(QueryBuilder.OBJSER); + try { + res[i] = objser == null ? null : + IndexAnalysisSettings.deserialize( + objser.binaryValue().bytes); + } catch (ClassNotFoundException ex) { + // This is not expected, so translate to RuntimeException. + throw new RuntimeException(ex); + } } + return res; } /** diff --git a/src/org/opensolaris/opengrok/index/IndexDatabase.java b/src/org/opensolaris/opengrok/index/IndexDatabase.java index 773fbf41933..b8191493c96 100644 --- a/src/org/opensolaris/opengrok/index/IndexDatabase.java +++ b/src/org/opensolaris/opengrok/index/IndexDatabase.java @@ -50,6 +50,8 @@ import java.util.zip.GZIPOutputStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; +import org.apache.lucene.codecs.lucene70.Lucene70Codec; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -395,6 +397,13 @@ public void update(IndexerParallelizer parallelizer) IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(env.getRamBufferSize()); + /** + * Most data in OpenGrok is indexed but not stored, so use the best + * compression on the minority of data that is stored, since it + * should not have a detrimental impact on overall throughput. + */ + iwc.setCodec(new Lucene70Codec( + Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION)); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk completer = new PendingFileCompleter(); diff --git a/src/org/opensolaris/opengrok/search/QueryBuilder.java b/src/org/opensolaris/opengrok/search/QueryBuilder.java index 01e575f04c7..798ced92cd0 100644 --- a/src/org/opensolaris/opengrok/search/QueryBuilder.java +++ b/src/org/opensolaris/opengrok/search/QueryBuilder.java @@ -30,6 +30,7 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.TreeMap; import org.apache.lucene.queryparser.classic.ParseException; @@ -80,6 +81,22 @@ public class QueryBuilder { */ private final Map queries = new TreeMap<>(); + /** + * Sets the instance to the state of {@code other}. + * @param other a defined instance + * @return {@code this} + */ + public QueryBuilder reset(QueryBuilder other) { + if (other == null) { + throw new IllegalArgumentException("other is null"); + } + if (this != other) { + queries.clear(); + queries.putAll(other.queries); + } + return this; + } + /** * Set search string for the "full" field. * @@ -253,6 +270,39 @@ public Map getQueries() { return Collections.unmodifiableMap(queries); } + /** + * Gets a list of fields from {@link #getQueries()} which are extracted + * from source text and which therefore can be used for context + * presentations -- in the order of most specific to least. + * @return a defined, possibly-empty list + */ + public List getContextFields() { + List fields = new ArrayList<>(queries.size()); + /** + * setFreetext() allows query fragments that specify a field name with + * a colon (e.g., "defs:ensure_cache" in the "Full Search" box), so the + * context fields (i.e., the result of this method) are not just the + * keys of `queries' but need a full parsing to be determined. + */ + Query query; + try { + query = build(); + } catch (ParseException ex) { + return fields; + } + String queryString = query.toString(""); + if (queryString.contains(DEFS + ":")) { + fields.add(DEFS); + } + if (queryString.contains(REFS + ":")) { + fields.add(REFS); + } + if (queryString.contains(FULL + ":")) { + fields.add(FULL); + } + return fields; + } + /** * Get the number of query fields set. * diff --git a/src/org/opensolaris/opengrok/search/Results.java b/src/org/opensolaris/opengrok/search/Results.java index 752a55d2f92..22cc1a0a737 100644 --- a/src/org/opensolaris/opengrok/search/Results.java +++ b/src/org/opensolaris/opengrok/search/Results.java @@ -82,9 +82,11 @@ private Results() { * @throws CorruptIndexException * @throws IOException */ - private static Map> createMap(IndexSearcher searcher, ScoreDoc[] hits, int startIdx, long stopIdx) + private static Map> createMap( + IndexSearcher searcher, ScoreDoc[] hits, int startIdx, long stopIdx) throws CorruptIndexException, IOException { - LinkedHashMap> dirHash = + + LinkedHashMap> dirHash = new LinkedHashMap<>(); for (int i = startIdx; i < stopIdx; i++) { int docId = hits[i].doc; @@ -96,12 +98,12 @@ private static Map> createMap(IndexSearcher searcher } String parent = rpath.substring(0, rpath.lastIndexOf('/')); - ArrayList dirDocs = dirHash.get(parent); + ArrayList dirDocs = dirHash.get(parent); if (dirDocs == null) { dirDocs = new ArrayList<>(); dirHash.put(parent, dirDocs); } - dirDocs.add(doc); + dirDocs.add(docId); } return dirHash; } @@ -167,7 +169,9 @@ public static void prettyPrint(Writer out, SearchHelper sh, int start, String xrefPrefixE = ctxE + Prefix.XREF_P; File xrefDataDir = new File(sh.dataRoot, Prefix.XREF_P.toString()); - for (Map.Entry> entry : + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + + for (Map.Entry> entry : createMap(sh.searcher, sh.hits, start, end).entrySet()) { String parent = entry.getKey(); out.write("!"); out.write(""); } + + int tabSize = sh.getTabSize(p); + PrintPlainFinalArgs fargs = new PrintPlainFinalArgs(out, sh, env, + xrefPrefix, tabSize, morePrefix); + out.write(""); - for (Document doc : entry.getValue()) { + for (int docId : entry.getValue()) { + Document doc = sh.searcher.doc(docId); String rpath = doc.get(QueryBuilder.PATH); String rpathE = Util.URIEncodePath(rpath); DateFormat df; @@ -202,7 +212,7 @@ public static void prettyPrint(Writer out, SearchHelper sh, int start, out.write(xrefPrefixE); out.write(rpathE); out.write("\""); - if (RuntimeEnvironment.getInstance().isLastEditedDisplayMode()) { + if (env.isLastEditedDisplayMode()) { try { // insert last edited date if possible df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT); @@ -222,18 +232,6 @@ public static void prettyPrint(Writer out, SearchHelper sh, int start, out.write(""); if (sh.sourceContext != null) { Genre genre = Genre.get(doc.get("t")); - Definitions tags = null; - IndexableField tagsField = doc.getField(QueryBuilder.TAGS); - if (tagsField != null) { - tags = Definitions.deserialize(tagsField.binaryValue().bytes); - } - Scopes scopes; - IndexableField scopesField = doc.getField(QueryBuilder.SCOPES); - if (scopesField != null) { - scopes = Scopes.deserialize(scopesField.binaryValue().bytes); - } else { - scopes = new Scopes(); - } if (Genre.XREFABLE == genre && sh.summarizer != null) { String xtags = getTags(xrefDataDir, rpath, sh.compressed); // FIXME use Highlighter from lucene contrib here, @@ -243,14 +241,8 @@ public static void prettyPrint(Writer out, SearchHelper sh, int start, } else if (Genre.HTML == genre && sh.summarizer != null) { String htags = getTags(sh.sourceRoot, rpath, false); out.write(sh.summarizer.getSummary(htags).toString()); - } else { - // SRCROOT is read with UTF-8 as a default. - Reader r = genre == Genre.PLAIN ? - IOUtils.createBOMStrippedReader( - new FileInputStream(new File(sh.sourceRoot, rpath)), - StandardCharsets.UTF_8.name()) : null; - sh.sourceContext.getContext(r, out, xrefPrefix, morePrefix, - rpath, tags, true, sh.builder.isDefSearch(), null, scopes); + } else if (genre == Genre.PLAIN) { + printPlain(fargs, doc, docId, rpath); } } @@ -263,7 +255,66 @@ public static void prettyPrint(Writer out, SearchHelper sh, int start, } } + private static void printPlain(PrintPlainFinalArgs fargs, Document doc, + int docId, String rpath) throws ClassNotFoundException, IOException { + + fargs.shelp.sourceContext.toggleAlt(); + + boolean didPresentNew = fargs.shelp.sourceContext.getContext2(fargs.env, + fargs.shelp.searcher, docId, fargs.out, fargs.xrefPrefix, + fargs.morePrefix, true, fargs.tabSize); + + if (!didPresentNew) { + /** + * Fall back to the old view, which re-analyzes text using + * PlainLinetokenizer. E.g., when source code is updated (thus + * affecting timestamps) but re-indexing is not yet complete. + */ + Definitions tags = null; + IndexableField tagsField = doc.getField(QueryBuilder.TAGS); + if (tagsField != null) { + tags = Definitions.deserialize(tagsField.binaryValue().bytes); + } + Scopes scopes; + IndexableField scopesField = doc.getField(QueryBuilder.SCOPES); + if (scopesField != null) { + scopes = Scopes.deserialize(scopesField.binaryValue().bytes); + } else { + scopes = new Scopes(); + } + boolean isDefSearch = fargs.shelp.builder.isDefSearch(); + // SRCROOT is read with UTF-8 as a default. + try (Reader r = IOUtils.createBOMStrippedReader(new FileInputStream( + new File(fargs.shelp.sourceRoot, rpath)), + StandardCharsets.UTF_8.name())) { + fargs.shelp.sourceContext.getContext(r, fargs.out, + fargs.xrefPrefix, fargs.morePrefix, rpath, tags, true, + isDefSearch, null, scopes); + } + } + } + private static String htmlize(String raw) { return Util.htmlize(raw); } + + private static class PrintPlainFinalArgs { + final Writer out; + final SearchHelper shelp; + final RuntimeEnvironment env; + final String xrefPrefix; + final String morePrefix; + final int tabSize; + + public PrintPlainFinalArgs(Writer out, SearchHelper shelp, + RuntimeEnvironment env, String xrefPrefix, int tabSize, + String morePrefix) { + this.out = out; + this.shelp = shelp; + this.env = env; + this.xrefPrefix = xrefPrefix; + this.morePrefix = morePrefix; + this.tabSize = tabSize; + } + } } diff --git a/src/org/opensolaris/opengrok/search/SearchEngine.java b/src/org/opensolaris/opengrok/search/SearchEngine.java index 565550b3d36..4ca46e35bb7 100644 --- a/src/org/opensolaris/opengrok/search/SearchEngine.java +++ b/src/org/opensolaris/opengrok/search/SearchEngine.java @@ -19,6 +19,7 @@ /* * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2018, Chris Fraire . */ package org.opensolaris.opengrok.search; @@ -43,6 +44,7 @@ import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiReader; @@ -122,6 +124,7 @@ public class SearchEngine { * Holds value of property indexDatabase. */ private Query query; + private QueryBuilder queryBuilder; private final CompatibleAnalyser analyzer = new CompatibleAnalyser(); private Context sourceContext; private HistoryContext historyContext; @@ -234,8 +237,39 @@ private void searchMultiDatabase(List root, boolean paging) throws IOEx } } + /** + * Gets the instance from {@code search(...)} if it was called. + * @return defined instance or {@code null} + */ public String getQuery() { - return query.toString(); + return query != null ? query.toString() : null; + } + + /** + * Gets the instance from {@code search(...)} if it was called. + * @return defined instance or {@code null} + */ + public Query getQueryObject() { + return query; + } + + /** + * Gets the builder from {@code search(...)} if it was called. + *

+ * (Modifying the builder will have no effect on this + * {@link SearchEngine}.) + * @return defined instance or {@code null} + */ + public QueryBuilder getQueryBuilder() { + return queryBuilder; + } + + /** + * Gets the searcher from {@code search(...)} if it was called. + * @return defined instance or {@code null} + */ + public IndexSearcher getSearcher() { + return searcher; } /** @@ -329,10 +363,9 @@ private int search(List projects, File root) { data = RuntimeEnvironment.getInstance().getDataRootPath(); docs.clear(); - QueryBuilder queryBuilder = createQueryBuilder(); - + QueryBuilder newBuilder = createQueryBuilder(); try { - query = queryBuilder.build(); + query = newBuilder.build(); if (query != null) { if (projects.isEmpty()) { @@ -357,7 +390,7 @@ private int search(List projects, File root) { sourceContext = null; summarizer = null; try { - sourceContext = new Context(query, queryBuilder.getQueries()); + sourceContext = new Context(query, newBuilder); if (sourceContext.isEmpty()) { sourceContext = null; } @@ -377,9 +410,32 @@ private int search(List projects, File root) { } } int count = hits == null ? 0 : hits.length; + queryBuilder = newBuilder; return count; } + /** + * Gets the queried score docs from {@code search(...)} if it was called. + * @return a defined instance if a query succeeded, or {@code null} + */ + public ScoreDoc[] scoreDocs() { + return hits; + } + + /** + * Gets the document of the specified {@code docId} from + * {@code search(...)} if it was called. + * @return a defined instance if a query succeeded + * @throws java.io.IOException if an error occurs obtaining the Lucene + * document by ID + */ + public Document doc(int docId) throws IOException { + if (searcher == null) { + throw new IllegalStateException("search(...) did not succeed"); + } + return searcher.doc(docId); + } + /** * get results , if no search was started before, no results are returned * this method will requery if end is more than first query from search, @@ -449,6 +505,7 @@ public void results(int start, int end, List ret) { int nhits = docs.size(); if (sourceContext != null) { + sourceContext.toggleAlt(); try { if (Genre.PLAIN == genre && (source != null)) { // SRCROOT is read with UTF-8 as a default. diff --git a/src/org/opensolaris/opengrok/search/context/Context.java b/src/org/opensolaris/opengrok/search/context/Context.java index b50f5feebaa..a584921827d 100644 --- a/src/org/opensolaris/opengrok/search/context/Context.java +++ b/src/org/opensolaris/opengrok/search/context/Context.java @@ -19,8 +19,8 @@ /* * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. - * * Portions Copyright 2011 Jens Elkner. + * Portions Copyright (c) 2018, Chris Fraire . */ /** @@ -32,6 +32,7 @@ import java.io.IOException; import java.io.Reader; import java.io.Writer; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -39,13 +40,19 @@ import java.util.logging.Level; import java.util.logging.Logger; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.opensolaris.opengrok.analysis.Definitions; +import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.Scopes; import org.opensolaris.opengrok.analysis.Scopes.Scope; +import org.opensolaris.opengrok.analysis.plain.PlainAnalyzerFactory; import org.opensolaris.opengrok.configuration.RuntimeEnvironment; import org.opensolaris.opengrok.logger.LoggerFactory; import org.opensolaris.opengrok.search.Hit; +import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.util.IOUtils; import org.opensolaris.opengrok.web.Util; @@ -53,6 +60,8 @@ public class Context { private static final Logger LOGGER = LoggerFactory.getLogger(Context.class); + private final Query query; + private final QueryBuilder qbuilder; private final LineMatcher[] m; static final int MAXFILEREAD = 1024 * 1024; private char[] buffer; @@ -64,34 +73,163 @@ public class Context { * whose values tell if the field is case insensitive (true for * insensitivity, false for sensitivity). */ - private static final Map tokenFields = + private static final Map TOKEN_FIELDS = new HashMap(); static { - tokenFields.put("full", Boolean.TRUE); - tokenFields.put("refs", Boolean.FALSE); - tokenFields.put("defs", Boolean.FALSE); + TOKEN_FIELDS.put(QueryBuilder.FULL, Boolean.TRUE); + TOKEN_FIELDS.put(QueryBuilder.REFS, Boolean.FALSE); + TOKEN_FIELDS.put(QueryBuilder.DEFS, Boolean.FALSE); } /** - * Constructs a context generator + * Initializes a context generator for matchers derived from the specified + * {@code query} -- which might be {@code null} and result in + * {@link #isEmpty()} returning {@code true}. * @param query the query to generate the result for - * @param queryStrings map from field names to queries against the fields + * @param qbuilder required builder used to create {@code query} */ - public Context(Query query, Map queryStrings) { + public Context(Query query, QueryBuilder qbuilder) { + if (qbuilder == null) { + throw new IllegalArgumentException("qbuilder is null"); + } + + this.query = query; + this.qbuilder = qbuilder; QueryMatchers qm = new QueryMatchers(); - m = qm.getMatchers(query, tokenFields); + m = qm.getMatchers(query, TOKEN_FIELDS); if (m != null) { - buildQueryAsURI(queryStrings); + buildQueryAsURI(qbuilder.getQueries()); //System.err.println("Found Matchers = "+ m.length + " for " + query); buffer = new char[MAXFILEREAD]; tokens = new PlainLineTokenizer((Reader) null); } } + /** + * Toggles the alternating value (initially {@code true}). + */ + public void toggleAlt() { + alt = !alt; + } + public boolean isEmpty() { return m == null; } + /** + * Look for context for this instance's initialized query in a search result + * {@link Document}, and output according to the parameters. + * @param env required environment + * @param searcher required search that produced the document + * @param docId document ID for producing context + * @param dest required target to write + * @param urlPrefix prefix for links + * @param morePrefix optional link to more... page + * @param limit a value indicating if the number of matching lines should be + * limited. N.b. unlike + * {@link #getContext(java.io.Reader, java.io.Writer, java.lang.String, java.lang.String, java.lang.String, org.opensolaris.opengrok.analysis.Definitions, boolean, boolean, java.util.List, org.opensolaris.opengrok.analysis.Scopes)}, + * the {@code limit} argument will not be interpreted w.r.t. + * {@link RuntimeEnvironment#isQuickContextScan()}. + * @param tabSize optional positive tab size that must accord with the value + * used when indexing + * @return Did it get any matching context? + */ + public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, + int docId, Appendable dest, String urlPrefix, String morePrefix, + boolean limit, int tabSize) { + + if (isEmpty()) { + return false; + } + + Document doc; + try { + doc = searcher.doc(docId); + } catch (IOException e) { + LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e); + return false; + } + + Definitions tags = null; + try { + IndexableField tagsField = doc.getField(QueryBuilder.TAGS); + if (tagsField != null) { + tags = Definitions.deserialize(tagsField.binaryValue().bytes); + } + } catch (ClassNotFoundException|IOException e) { + LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e); + return false; + } + + Scopes scopes; + try { + IndexableField scopesField = doc.getField(QueryBuilder.SCOPES); + if (scopesField != null) { + scopes = Scopes.deserialize(scopesField.binaryValue().bytes); + } else { + scopes = new Scopes(); + } + } catch (ClassNotFoundException|IOException e) { + LOGGER.log(Level.WARNING, "ERROR Scopes.deserialize(...)", e); + return false; + } + + /* + * UnifiedHighlighter demands an analyzer "even if in some + * circumstances it isn't used"; here it is not meant to be used. + */ + PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE; + FileAnalyzer anz = fac.getAnalyzer(); + + String path = doc.get(QueryBuilder.PATH); + String pathE = Util.URIEncodePath(path); + String urlPrefixE = urlPrefix == null ? "" : Util.URIEncodePath( + urlPrefix); + String moreURL = morePrefix == null ? null : Util.URIEncodePath( + morePrefix) + pathE + "?" + queryAsURI; + + ContextArgs args = new ContextArgs(env.getContextSurround(), + env.getContextLimit()); + /** + * Lucene adds to the following value in FieldHighlighter, so avoid + * integer overflow by not using Integer.MAX_VALUE -- Short is good + * enough. + */ + int linelimit = limit ? args.getContextLimit() : Short.MAX_VALUE; + + ContextFormatter formatter = new ContextFormatter(args); + formatter.setUrl(urlPrefixE + pathE); + formatter.setDefs(tags); + formatter.setScopes(scopes); + formatter.setMoreUrl(moreURL); + formatter.setMoreLimit(linelimit); + + OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, + searcher, anz); + uhi.setBreakIterator(() -> new StrictLineBreakIterator()); + uhi.setFormatter(formatter); + uhi.setTabSize(tabSize); + + try { + List fieldList = qbuilder.getContextFields(); + String[] fields = fieldList.toArray(new String[fieldList.size()]); + + String res = uhi.highlightFieldsUnion(fields, query, docId, + linelimit); + if (res != null) { + dest.append(res); + return true; + } + } catch (IOException e) { + LOGGER.log(Level.WARNING, "ERROR highlightFieldsUnion(...)", e); + // Continue below. + } catch (Throwable e) { + LOGGER.log(Level.SEVERE, "ERROR highlightFieldsUnion(...)", e); + throw e; + } + return false; + } + /** * Build the {@code queryAsURI} string that holds the query in a form * that's suitable for sending it as part of a URI. @@ -107,7 +245,7 @@ private void buildQueryAsURI(Map subqueries) { for (Map.Entry entry : subqueries.entrySet()) { String field = entry.getKey(); String queryText = entry.getValue(); - if ("full".equals(field)) { + if (QueryBuilder.FULL.equals(field)) { field = "q"; // bah - search query params should be consistent! } sb.append(field).append("=").append(Util.URIEncode(queryText)) @@ -139,7 +277,6 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, public boolean getContext(Reader in, Writer out, String urlPrefix, String morePrefix, String path, Definitions tags, boolean limit, boolean isDefSearch, List hits, Scopes scopes) { - alt = !alt; if (m == null) { IOUtils.close(in); return false; @@ -233,7 +370,8 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, boolean truncated = false; boolean lim = limit; - if (!RuntimeEnvironment.getInstance().isQuickContextScan()) { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + if (!env.isQuickContextScan()) { lim = false; } @@ -273,11 +411,13 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, tokens.setFilename(path); } + int limit_max_lines = env.getContextLimit(); try { String token; - int matchState = LineMatcher.NOT_MATCHED; + int matchState; int matchedLines = 0; - while ((token = tokens.yylex()) != null && (!lim || matchedLines < 10)) { + while ((token = tokens.yylex()) != null && (!lim || + matchedLines < limit_max_lines)) { for (int i = 0; i < m.length; i++) { matchState = m[i].match(token); if (matchState == LineMatcher.MATCHED) { @@ -298,7 +438,8 @@ public boolean getContext(Reader in, Writer out, String urlPrefix, } anything = matchedLines > 0; tokens.dumpRest(); - if (lim && (truncated || matchedLines == 10) && out != null) { + if (lim && (truncated || matchedLines == limit_max_lines) && + out != null) { out.write("[all...]"); } } catch (IOException e) { diff --git a/src/org/opensolaris/opengrok/search/context/ContextArgs.java b/src/org/opensolaris/opengrok/search/context/ContextArgs.java new file mode 100644 index 00000000000..52e4129b320 --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/ContextArgs.java @@ -0,0 +1,98 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +/** + * Represents an immutable settings instance for coordinating objects related + * to producing context presentations. + */ +public class ContextArgs { + /** Not Lucene-related, so {@code int} does fine. */ + private static final int CONTEXT_WIDTH = 100; + + /** Lucene uhighlight-related, so {@code short} is safer. */ + private final short contextSurround; + + /** Lucene uhighlight-related, so {@code short} is safer. */ + private final short contextLimit; + + /** + * Initializes an instance with the specified values. + *

+ * {@code short} is used because some Lucene classes were found to choke + * when OpenGrok used {@link Integer#MAX_VALUE} to mean "unbounded". + * {@code short} is safer therefore but unfortunately somewhat syntactically + * inconvenient. + * @param contextSurround a non-negative value + * @param contextLimit a positive value + */ + public ContextArgs(short contextSurround, short contextLimit) { + if (contextSurround < 0) { + throw new IllegalArgumentException( + "contextSurround cannot be negative"); + } + if (contextLimit < 1) { + throw new IllegalArgumentException( + "contextLimit must be positive"); + } + this.contextSurround = contextSurround; + this.contextLimit = contextLimit; + } + + /** + * Gets the number of lines of leading and trailing context surrounding each + * match line to present. + *

+ * (N.b. the value is used w.r.t. {@link #getContextLimit()} and therefore + * w.r.t. Lucene {@code uhighlight}, and {@code short} is safer, though + * syntactically inconvenient, to avoid numeric overlow that may occur with + * {@code int} in that library.) + * @return a non-negative value + */ + public short getContextSurround() { + return contextSurround; + } + + /** + * Gets the maximum number of lines to present, after which a "more..." link + * is displayed to allow the user to view full match results. + *

+ * (N.b. the value is used with Lucene {@code uhighlight}, and {@code short} + * is safer, though syntactically inconvenient, to avoid numeric overlow + * that may occur with {@code int} in that library.) + * @return a positive value + */ + public short getContextLimit() { + return contextLimit; + } + + /** + * Gets a value indicating the maximum width to show for lines in a context + * presentation. + * @return a positive value + */ + public int getContextWidth() { + return CONTEXT_WIDTH; + } +} diff --git a/src/org/opensolaris/opengrok/search/context/ContextFormatter.java b/src/org/opensolaris/opengrok/search/context/ContextFormatter.java new file mode 100644 index 00000000000..345efb3a0cc --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/ContextFormatter.java @@ -0,0 +1,406 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.lucene.search.uhighlight.Passage; +import org.apache.lucene.search.uhighlight.PassageFormatter; +import org.opensolaris.opengrok.analysis.Definitions; +import org.opensolaris.opengrok.analysis.Definitions.Tag; +import org.opensolaris.opengrok.analysis.Scopes; +import org.opensolaris.opengrok.logger.LoggerFactory; +import org.opensolaris.opengrok.util.SourceSplitter; +import org.opensolaris.opengrok.util.StringUtils; +import org.opensolaris.opengrok.web.HtmlConsts; +import org.opensolaris.opengrok.web.Util; + +/** + * Represents a subclass of {@link PassageFormatter} that uses + * {@link PassageConverter}. + */ +public class ContextFormatter extends PassageFormatter { + + private static final String MORE_LABEL = "[all " + HtmlConsts.HELLIP + "]"; + + private static final Logger LOGGER = LoggerFactory.getLogger( + ContextFormatter.class); + + /** + * Matches a non-word character: + *

+     * {@code
+     * (?U)\W
+     * }
+     * 
+ * (Edit above and paste below [in NetBeans] for easy String escaping.) + */ + private static final Pattern NONWORD_CHAR = Pattern.compile("(?U)\\W"); + + private final PassageConverter cvt; + private final List marks = new ArrayList<>(); + private String url; + private Definitions defs; + private Scopes scopes; + + /** + * An optional URL for linking when the {@link #moreLimit} (if positive) is + * reached. + */ + private String moreUrl; + private int moreLimit; + + /** + * Cached splitter, keyed by {@link #originalText} + */ + private SourceSplitter splitter; + private String originalText; + + /** + * Initializes a formatter for the specified arguments. + * @param args required instance + */ + public ContextFormatter(ContextArgs args) { + this.cvt = new PassageConverter(args); + } + + /** + * Gets the initialized value. + * @return a defined instance + */ + public ContextArgs getArgs() { + return cvt.getArgs(); + } + + /** + * Gets the required URL to use for linking lines. + * @return the URL or {@code null} + */ + public String getUrl() { + return url; + } + + /** + * Sets the required URL to use for linking lines. + * @param value the URL to use + */ + public void setUrl(String value) { + this.url = value; + } + + /** + * Gets the optional URL to use if {@link #getMoreLimit()} is reached. + * @return the URL or {@code null} + */ + public String getMoreUrl() { + return moreUrl; + } + + /** + * Sets the optional URL to use if {@link #getMoreLimit()} is reached. + * @param value the URL to use + */ + public void setMoreUrl(String value) { + this.moreUrl = value; + } + + /** + * Gets the optional line limit to specify (if positive) a maximum number + * of lines to format and -- if {@link #getMoreUrl()} is defined -- a "more" + * link to display. Default is zero (i.e. inactive). + * @return the line limit value + */ + public int getMoreLimit() { + return moreLimit; + } + + /** + * Sets the optional line limit to specify (if positive) a maximum number + * of lines to format and -- if {@link #getMoreUrl()} is defined -- a "more" + * link to display. + * @param value the line limit + */ + public void setMoreLimit(int value) { + if (value < 0) { + throw new IllegalArgumentException("value is negative"); + } + this.moreLimit = value; + } + + /** + * Gets the optional definitions. + * @return the defs + */ + public Definitions getDefs() { + return defs; + } + + /** + * Sets the optional definitions. + * @param value + */ + public void setDefs(Definitions value) { + this.defs = value; + } + + /** + * Gets the optional scopes to use. + * @return the scopes + */ + public Scopes getScopes() { + return scopes; + } + + /** + * Sets the optional scopes to use. + * @param value + */ + public void setScopes(Scopes value) { + this.scopes = value; + } + + /** + * Splits {@code originalText} using {@link SourceSplitter}, converts + * passages using {@link PassageConverter}, and formats for presentation in + * OpenGrok UI using the instance's properties (e.g., {@link #getUrl()} and + * {@link #getDefs()}). + * @param passages a required instance + * @param originalText a required instance + * @return a defined {@link FormattedLines} instance, which might be empty + * @throws IllegalStateException if {@link #getUrl()} is null + */ + @Override + public Object format(Passage[] passages, String originalText) { + String lineUrl = url; + if (lineUrl == null) { + throw new IllegalStateException("Url property is null"); + } + + if (this.originalText == null || !this.originalText.equals( + originalText)) { + splitter = new SourceSplitter(); + splitter.reset(originalText); + this.originalText = originalText; + } + + FormattedLines res = new FormattedLines(); + StringBuilder bld = new StringBuilder(); + SortedMap lines = cvt.convert(passages, + splitter); + int numl = 0; + boolean limited = false; + for (LineHighlight lhi : lines.values()) { + ++numl; + if (moreLimit > 0 && numl > moreLimit) { + limited = true; + break; + } + + String line = splitter.getLine(lhi.getLineno()); + Matcher eofmatcher = StringUtils.STANDARD_EOL.matcher(line); + if (eofmatcher.find()) { + line = line.substring(0, eofmatcher.start()); + } + + try { + marks.clear(); + startLine(bld, lineUrl, lhi.getLineno()); + int loff = 0; + int hioff = 0; + boolean didBold = false; + while (loff < line.length()) { + if (hioff >= lhi.countMarkups() || + lhi.getMarkup(hioff).getLineStart() >= + line.length()) { + // If there are no more markups, use all remaining. + lhi.hsub(bld, line, loff); + loff = line.length(); + } else { + PhraseHighlight phi = lhi.getMarkup(hioff++); + + /* + * If the highlight is a sub-string wholly within the + * line, add it to the `marks' list. + */ + if (phi.getLineStart() >= 0 && phi.getLineEnd() >= 0 && + phi.getLineEnd() <= line.length()) { + marks.add(line.substring(phi.getLineStart(), + phi.getLineEnd())); + } + + if (phi.getLineStart() < 0) { + if (!didBold) { + bld.append(HtmlConsts.B); + } + if (phi.getLineEnd() != Integer.MAX_VALUE) { + lhi.hsub(bld, line, loff, phi.getLineEnd()); + loff += phi.getLineEnd() - loff; + } else { + lhi.hsub(bld, line, loff); + loff = line.length(); + } + bld.append(HtmlConsts.ZB); + didBold = false; + } else { + lhi.hsub(bld, line, loff, phi.getLineStart()); + loff += phi.getLineStart() - loff; + if (!didBold) { + bld.append(HtmlConsts.B); + didBold = true; + } + } + if (phi.getLineEnd() == Integer.MAX_VALUE) { + lhi.hsub(bld, line, loff); + loff = line.length(); + } else { + lhi.hsub(bld, line, loff, phi.getLineEnd()); + loff += phi.getLineEnd() - loff; + if (didBold) { + bld.append(HtmlConsts.ZB); + didBold = false; + } + } + } + } + + if (didBold) { + bld.append(HtmlConsts.ZB); + // no need to unset didBold + } + finishLine(bld, lhi.getLineno(), marks); + // regardless of true EOF, write a
+ bld.append(HtmlConsts.BR); + /** + * Appending a LF here would hurt the more.jsp view, while + * search.jsp (where getContext() does it) is indifferent -- so + * skip it. + */ + res.put(lhi.getLineno(), bld.toString()); + bld.setLength(0); + } catch (IOException e) { + LOGGER.log(Level.SEVERE, "Could not format()", e); + return res; + } + } + + res.setLimited(limited); + if (moreUrl != null) { + bld.append(""); + bld.append(MORE_LABEL); + bld.append(""); + bld.append(HtmlConsts.BR); + bld.append("\n"); + res.setFooter(bld.toString()); + bld.setLength(0); + } + return res; + } + + private void startLine(Appendable dest, String lineUrl, int lineOffset) + throws IOException { + dest.append(""); + dest.append(num); + dest.append(" "); + } + + private void finishLine(Appendable dest, int lineOffset, List marks) + throws IOException { + dest.append(""); + writeScope(lineOffset, dest); + writeTag(lineOffset, dest, marks); + } + + private void writeScope(int lineOffset, Appendable dest) + throws IOException { + Scopes.Scope scope = null; + if (scopes != null) { + // N.b. use ctags 1-offset vs 0-offset. + scope = scopes.getScope(lineOffset + 1); + } + if (scope != null && scope != scopes.getScope(-1)) { + dest.append(" in "); + Util.htmlize(scope.getName(), dest); + dest.append("()"); + } + } + + private void writeTag(int lineOffset, Appendable dest, List marks) + throws IOException { + if (defs != null) { + // N.b. use ctags 1-offset vs 0-offset. + List linetags = defs.getTags(lineOffset + 1); + if (linetags != null) { + Tag pickedTag = findTagForMark(linetags, marks); + if (pickedTag != null) { + dest.append(" "); + Util.htmlize(pickedTag.type, dest); + dest.append(""); + } + } + } + } + + /** + * Search the cross product of {@code linetags} and {@code marks} for any + * mark that starts with a {@link Tag#symbol} and where any subsequent + * character is a non-word ({@code (?U)\W}) character. + * @return a defined instance or {@code null} + */ + private Tag findTagForMark(List linetags, List marks) { + for (Tag tag : linetags) { + if (tag.type != null) { + for (String mark : marks) { + if (mark.startsWith(tag.symbol) && (mark.length() == + tag.symbol.length() || isNonWord( + mark.charAt(tag.symbol.length())))) { + return tag; + } + } + } + } + return null; + } + + private static boolean isNonWord(char c) { + String cword = String.valueOf(c); + return NONWORD_CHAR.matcher(cword).matches(); + } +} diff --git a/src/org/opensolaris/opengrok/search/context/FormattedLines.java b/src/org/opensolaris/opengrok/search/context/FormattedLines.java new file mode 100644 index 00000000000..820855e8f7e --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/FormattedLines.java @@ -0,0 +1,136 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.SortedMap; +import java.util.TreeMap; +import org.apache.lucene.search.uhighlight.UnifiedHighlighter; + +/** + * Represents structured results from {@link ContextFormatter} that can be + * merged with other instances. + *

+ * {@link UnifiedHighlighter} returns results separated by field, and + * {@link OGKUnifiedHighlighter} merges them together to return a coherent + * result for presentation. + */ +public class FormattedLines { + private final SortedMap lines = new TreeMap<>(); + private String footer; + private boolean limited; + + /** + * Gets a count of the number of lines in the instance. + */ + public int getCount() { + return lines.size(); + } + + /** + * @return the footer + */ + public String getFooter() { + return footer; + } + + public void setFooter(String value) { + footer = value; + } + + /** + * Gets a value indicating if lines were limited. + */ + public boolean isLimited() { + return limited; + } + + /** + * Sets a value indicating if lines were limited. + */ + public void setLimited(boolean value) { + limited = value; + } + + /** + * Removes the highest line from the instance. + * @return a defined value + * @throws NoSuchElementException if the instance is empty + */ + public String pop() { + return lines.remove(lines.lastKey()); + } + + /** + * Sets the specified String line for the specified line number, replacing + * any previous entry for the same line number. + * @param lineno a value + * @param line a defined instance + * @return the former value or {@code null} + */ + public String put(int lineno, String line) { + if (line == null) { + throw new IllegalArgumentException("line is null"); + } + return lines.put(lineno, line); + } + + /** + * Creates a new instance with lines merged from this instance and + * {@code other}. Any lines in common for the same line number are taken + * from this instance rather than {@code other}; and likewise for + * {@link #getFooter()}. + *

+ * {@link #isLimited()} will be {@code true} if either is {@code true}, but + * the value is suspect since it cannot be truly known if the merged result + * is actually the unlimited result. + * @param other a defined instance + * @return a defined instance + */ + public FormattedLines merge(FormattedLines other) { + FormattedLines res = new FormattedLines(); + res.lines.putAll(this.lines); + for (Map.Entry kv : other.lines.entrySet()) { + res.lines.putIfAbsent(kv.getKey(), kv.getValue()); + } + + res.setFooter(this.footer != null ? this.footer : other.footer); + res.setLimited(this.limited || other.limited); + return res; + } + + @Override + public String toString() { + StringBuilder bld = new StringBuilder(); + for (String line : lines.values()) { + bld.append(line); + } + String f = footer; + if (f != null && limited) { + bld.append(f); + } + return bld.toString(); + } +} diff --git a/src/org/opensolaris/opengrok/search/context/LineHighlight.java b/src/org/opensolaris/opengrok/search/context/LineHighlight.java new file mode 100644 index 00000000000..41910fb560d --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/LineHighlight.java @@ -0,0 +1,207 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.opensolaris.opengrok.web.HtmlConsts; +import org.opensolaris.opengrok.web.Util; + +/** + * Represents a collection of metadata related to highlighting a single line + * of code. + */ +public class LineHighlight { + + private final int lineno; + private List markups; + /** Offset of elided left part */ + private int lelide; + /** Offset of elide right part */ + private int relide; + + private boolean didLelide; + private boolean didRelide; + + public LineHighlight(int lineno) { + if (lineno < 0) { + throw new IllegalArgumentException("lineno cannot be negative"); + } + this.lineno = lineno; + } + + /** + * Gets the number of markups + * @return zero or greater + */ + public int countMarkups() { + return markups == null ? 0 : markups.size(); + } + + /** + * Gets the highlight at the specified position. + * @param i index of element to return + * @return defined instance + */ + public PhraseHighlight getMarkup(int i) { + return markups.get(i); + } + + /** + * Sort and condense overlapping markup highlights. + */ + public void condenseMarkups() { + if (markups == null) { + return; + } + + markups.sort(PhraseHighlightComparator.INSTANCE); + // Condense instances if there is overlap. + for (int i = 0; i + 1 < markups.size(); ++i) { + PhraseHighlight phi0 = markups.get(i); + PhraseHighlight phi1 = markups.get(i + 1); + if (phi0.overlaps(phi1)) { + phi0 = phi0.merge(phi1); + markups.set(i, phi0); + markups.remove(i + 1); + --i; + } + } + } + + /** + * Adds the specified highlight. + * @param phi a defined instance + */ + public void addMarkup(PhraseHighlight phi) { + if (phi == null) { + throw new IllegalArgumentException("phi is null"); + } + if (markups == null) { + markups = new ArrayList<>(); + } + markups.add(phi); + } + + /** + * @return the lineno + */ + public int getLineno() { + return lineno; + } + + /** + * Gets the left elide value. + * @return zero or greater + */ + public int getLelide() { + return lelide; + } + + /** + * Sets the left elide value. + */ + public void setLelide(int value) { + if (value < 0) { + throw new IllegalArgumentException("value is negative"); + } + this.lelide = value; + } + + /** + * Gets the right elide value. + * @return zero or greater + */ + public int getRelide() { + return relide; + } + + /** + * Sets the right elide value. + */ + public void setRelide(int value) { + if (value < 0) { + throw new IllegalArgumentException("value is negative"); + } + this.relide = value; + } + + /** + * Append a substring with + * {@link Util#htmlize(java.lang.CharSequence, java.lang.Appendable, boolean)}, + * taking into account any positive {@link #getLelide()} or + * {@link #getRelide()}. + * @param dest + * @param line + * @param start + * @param end + * @throws IOException + */ + public void hsub(Appendable dest, String line, int start, int end) + throws IOException { + boolean lell = false; + boolean rell = false; + if (start < lelide) { + lell = true; + start = lelide; + } + if (end < lelide) { + end = lelide; + } + if (relide > 0) { + if (start > relide) { + start = relide; + } + if (end > relide) { + rell = true; + end = relide; + } + } + String str = line.substring(start, end); + if (lell && !didLelide) { + dest.append(HtmlConsts.HELLIP); + didLelide = true; + } + Util.htmlize(str, dest, true); + if (rell && !didRelide) { + dest.append(HtmlConsts.HELLIP); + didRelide = true; + } + } + + /** + * Calls {@link #hsub(java.lang.Appendable, java.lang.String, int, int)} + * with {@code dest}, {@code line}, {@code loff}, and {@code line} + * {@link String#length()}. + * @param dest + * @param line + * @param loff + * @throws IOException + */ + public void hsub(Appendable dest, String line, int loff) + throws IOException { + hsub(dest, line, loff, line.length()); + } +} diff --git a/src/org/opensolaris/opengrok/search/context/OGKUnifiedHighlighter.java b/src/org/opensolaris/opengrok/search/context/OGKUnifiedHighlighter.java new file mode 100644 index 00000000000..b99fc5b2122 --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/OGKUnifiedHighlighter.java @@ -0,0 +1,331 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Portions Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.uhighlight.PhraseHelper; +import org.apache.lucene.search.uhighlight.UnifiedHighlighter; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.opensolaris.opengrok.analysis.AnalyzerGuru; +import org.opensolaris.opengrok.analysis.ExpandTabsReader; +import org.opensolaris.opengrok.analysis.StreamSource; +import org.opensolaris.opengrok.configuration.RuntimeEnvironment; +import org.opensolaris.opengrok.logger.LoggerFactory; +import org.opensolaris.opengrok.search.QueryBuilder; +import org.opensolaris.opengrok.util.IOUtils; +import org.opensolaris.opengrok.web.Util; + +/** + * Represents a subclass of {@link UnifiedHighlighter} with customizations for + * OpenGrok. + */ +public class OGKUnifiedHighlighter extends UnifiedHighlighter { + + private static final Logger LOGGER = LoggerFactory.getLogger( + OGKUnifiedHighlighter.class); + + private final RuntimeEnvironment env; + + private int tabSize; + + private String fileTypeName; + + /** + * Initializes an instance with + * {@link UnifiedHighlighter#UnifiedHighlighter(org.apache.lucene.search.IndexSearcher, org.apache.lucene.analysis.Analyzer)} + * for the specified {@code indexSearcher} and {@code indexAnalyzer}, and + * stores the {@code env} for later use. + * @param env a required instance + * @param indexSearcher a required instance + * @param indexAnalyzer a required instance + * @throws IllegalArgumentException if any argument is null + */ + public OGKUnifiedHighlighter(RuntimeEnvironment env, + IndexSearcher indexSearcher, Analyzer indexAnalyzer) { + super(indexSearcher, indexAnalyzer); + + if (env == null) { + throw new IllegalArgumentException("env is null"); + } + this.env = env; + } + + /** + * Gets a file type name-specific analyzer during the execution of + * {@link #highlightFieldsUnion(java.lang.String[], org.apache.lucene.search.Query, int, int)}, + * or just gets the object passed in to the constructor at all other times. + * @return a defined instance + */ + @Override + public Analyzer getIndexAnalyzer() { + String ftname = fileTypeName; + if (ftname == null) { + return indexAnalyzer; + } + Analyzer fa = AnalyzerGuru.getAnalyzer(ftname); + return fa == null ? indexAnalyzer : fa; + } + + public int getTabSize() { + return tabSize; + } + + public void setTabSize(int value) { + this.tabSize = value; + } + + /** + * Transiently arranges that {@link #getIndexAnalyzer()} returns a file type + * name-specific analyzer during a subsequent call of + * {@link #highlightFieldsUnionWork(java.lang.String[], org.apache.lucene.search.Query, int, int)}. + * @param fields a defined instance + * @param query a defined instance + * @param docId a valid document ID + * @param lineLimit the maximum number of lines to return + * @return a defined instance or else {@code null} if there are no results + * @throws IOException if accessing the Lucene document fails + */ + public String highlightFieldsUnion(String[] fields, Query query, + int docId, int lineLimit) throws IOException { + /** + * Setting fileTypeName has to happen before getFieldHighlighter() is + * called by highlightFieldsAsObjects() so that the result of + * getIndexAnalyzer() (if it is called due to requiring ANALYSIS) can be + * influenced by fileTypeName. + */ + Document doc = searcher.doc(docId); + fileTypeName = doc == null ? null : doc.get(QueryBuilder.TYPE); + try { + return highlightFieldsUnionWork(fields, query, docId, lineLimit); + } finally { + fileTypeName = null; + } + } + + /** + * Calls + * {@link #highlightFieldsAsObjects(java.lang.String[], org.apache.lucene.search.Query, int[], int[])}, + * and merges multiple passages if the formatter returns + * {@link FormattedLines} or else returns the first formatted result. + * @param fields a defined instance + * @param query a defined instance + * @param docId a valid document ID + * @param lineLimit the maximum number of lines to return + * @return a defined instance or else {@code null} if there are no results + * @throws IOException if accessing the Lucene document fails + */ + protected String highlightFieldsUnionWork(String[] fields, Query query, + int docId, int lineLimit) throws IOException { + int[] maxPassagesCopy = new int[fields.length]; + /** + * N.b. linelimit + 1 so that the ContextFormatter has an indication + * when to display the "more..." link. + */ + Arrays.fill(maxPassagesCopy, lineLimit + 1); + + FormattedLines res = null; + Map mappedRes = highlightFieldsAsObjects(fields, + query, new int[]{docId}, maxPassagesCopy); + for (Object[] flinesz : mappedRes.values()) { + for (Object obj : flinesz) { + /** + * Empirical testing showed that the passage could be null if + * the original source text is not available to the highlighter. + */ + if (obj != null) { + if (!(obj instanceof FormattedLines)) { + return obj.toString(); + } + FormattedLines flines = (FormattedLines)obj; + res = res == null ? flines : res.merge(flines); + } + } + } + if (res == null) { + return null; + } + if (res.getCount() > lineLimit) { + res.setLimited(true); + while (res.getCount() > lineLimit) { + res.pop(); + } + } + return res.toString(); + } + + /** + * Produces original text by reading from OpenGrok source content relative + * to {@link RuntimeEnvironment#getSourceRootPath()} and returns the content + * for each document if the timestamp matches -- or else just {@code null} + * for a missing file or a timestamp mismatch (as "the returned Strings must + * be identical to what was indexed.") + *

+ * "This method must load fields for at least one document from the given + * {@link DocIdSetIterator} but need not return all of them; by default the + * character lengths are summed and this method will return early when + * {@code cacheCharsThreshold} is exceeded. Specifically if that number is + * 0, then only one document is fetched no matter what. Values in the array + * of {@link CharSequence} will be {@code null} if no value was found." + * @return a defined instance + * @throws IOException if an I/O error occurs + */ + @Override + protected List loadFieldValues(String[] fields, + DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException { + + List docListOfFields = new ArrayList<>( + cacheCharsThreshold == 0 ? 1 : (int) Math.min(64, docIter.cost())); + + int sumChars = 0; + do { + int docId = docIter.nextDoc(); + if (docId == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + Document doc = searcher.doc(docId); + + String path = doc.get(QueryBuilder.PATH); + String storedU = doc.get(QueryBuilder.U); + String content = getRepoFileContent(path, storedU); + + CharSequence[] seqs = new CharSequence[fields.length]; + Arrays.fill(seqs, content); + docListOfFields.add(seqs); + + if (content != null) { + sumChars += content.length(); + } + } while (sumChars <= cacheCharsThreshold && cacheCharsThreshold != 0); + + return docListOfFields; + } + + /** + * Returns the value from the {@code super} implementation, with logging for + * ANALYSIS of any field but {@link QueryBuilder#FULL} or + * {@link QueryBuilder#REFS}. + * @return the value from the {@code super} implementation + */ + @Override + protected OffsetSource getOptimizedOffsetSource(String field, + BytesRef[] terms, PhraseHelper phraseHelper, + CharacterRunAutomaton[] automata) { + + OffsetSource res = super.getOptimizedOffsetSource(field, + terms, phraseHelper, automata); + if (res == OffsetSource.ANALYSIS) { + /** + * Testing showed that UnifiedHighlighter falls back to + * ANALYSIS in the presence of multi-term queries (MTQs) such as + * prefixes and wildcards even for fields that are analyzed with + * POSTINGS -- i.e. with DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS. + * This is despite UnifiedHighlighter seeming to indicate that + * postings should be sufficient in the comment for + * shouldHandleMultiTermQuery(String): "MTQ highlighting can be + * expensive, particularly when using offsets in postings." + * DEFS are stored with term vectors to avoid this problem, + * since re-analysis would not at all accord with ctags Definitions. + * For FULL and REFS, highlightFieldsUnion() arranges that + * getIndexAnalyzer() can return a TYPE-specific analyzer for use by + * getOffsetStrategy() -- if re-ANALYSIS is required. + */ + switch (field) { + case QueryBuilder.FULL: + case QueryBuilder.REFS: + // Acceptable -- as described above. + break; + default: + if (LOGGER.isLoggable(Level.FINE)) { + OffsetSource defaultRes = getOffsetSource(field); + LOGGER.log(Level.FINE, "Field {0} using {1} vs {2}", + new Object[]{field, res, defaultRes}); + } + break; + } + } + return res; + } + + private String getRepoFileContent(String repoRelPath, String storedU) + throws IOException { + + if (storedU == null) { + LOGGER.log(Level.FINE, "Missing U[UID] for: {0}", + repoRelPath); + return null; + } + + String repoAbsPath = env.getSourceRootPath() + repoRelPath; + File repoAbsFile = new File(repoAbsPath); + if (!repoAbsFile.exists()) { + LOGGER.log(Level.FINE, "Missing file: {0}", repoAbsPath); + return null; + } + + // Verify that timestamp (U) is unchanged by comparing UID. + String uid = Util.path2uid(repoRelPath, + DateTools.timeToString(repoAbsFile.lastModified(), + DateTools.Resolution.MILLISECOND)); + BytesRef buid = new BytesRef(uid); + BytesRef storedBuid = new BytesRef(storedU); + if (storedBuid.compareTo(buid) != 0) { + LOGGER.log(Level.FINE, "Last-modified differs for: {0}", + repoRelPath); + return null; + } + + StringBuilder bld = new StringBuilder(); + StreamSource src = StreamSource.fromFile(repoAbsFile); + try (InputStream in = src.getStream(); + Reader rdr = getReader(in)) { + int c; + while ((c = rdr.read()) != -1) { + bld.append((char)c); + } + } + + return bld.toString(); + } + + private Reader getReader(InputStream in) throws IOException { + Reader bsrdr = IOUtils.createBOMStrippedReader(in, + StandardCharsets.UTF_8.name()); + BufferedReader bufrdr = new BufferedReader(bsrdr); + return ExpandTabsReader.wrap(bufrdr, tabSize); + } +} diff --git a/src/org/opensolaris/opengrok/search/context/PassageConverter.java b/src/org/opensolaris/opengrok/search/context/PassageConverter.java new file mode 100644 index 00000000000..20b8e7c50ed --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/PassageConverter.java @@ -0,0 +1,275 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.util.SortedMap; +import java.util.TreeMap; +import org.apache.lucene.search.uhighlight.Passage; +import org.opensolaris.opengrok.util.SourceSplitter; + +/** + * Represents an object that can translate {@link Passage} instances into + * indexed {@link LineHighlight} instances, taking into account a configurable + * number of leading and trailing lines of context for each match. + */ +public class PassageConverter { + + private final ContextArgs args; + + /** + * Initializes a converter for the specified arguments. + * @param args required instance + */ + public PassageConverter(ContextArgs args) { + if (args == null) { + throw new IllegalArgumentException("args is null"); + } + this.args = args; + } + + /** + * @return the initialized value + */ + public ContextArgs getArgs() { + return args; + } + + /** + * Converts the specified passages into a sorted map of + * {@link LineHighlight} instances keyed by line offsets. + * @param passages a defined instance + * @param splitter a defined instance + * @return a defined instance + */ + public SortedMap convert(Passage[] passages, + SourceSplitter splitter) { + + SortedMap res = new TreeMap<>(); + for (Passage passage : passages) { + int start = passage.getStartOffset(); + int end = passage.getEndOffset(); + if (start >= end) { + continue; + } + + int m = splitter.findLineOffset(start); + if (m < 0) { + continue; + } + int n = splitter.findLineOffset(end - 1); + if (n < 0) { + continue; + } + + m = Math.max(0, m - args.getContextSurround()); + n = Math.min(splitter.count() - 1, n + args.getContextSurround()); + + // Ensure an entry in `res' for every passage line. + for (int i = m; i <= n; ++i) { + if (!res.containsKey(i)) { + res.put(i, new LineHighlight(i)); + } + } + + // Create LineHighlight entries for passage matches. + for (int i = 0; i < passage.getNumMatches(); ++i) { + int mstart = passage.getMatchStarts()[i]; + int mm = splitter.findLineOffset(mstart); + int mend = passage.getMatchEnds()[i]; + int nn = splitter.findLineOffset(mend - 1); + if (mstart < mend && mm >= m && mm <= n && nn >= m && nn <= n) { + if (mm == nn) { + int lbeg = splitter.getPosition(mm); + int lstart = mstart - lbeg; + int lend = mend - lbeg; + LineHighlight lhigh = res.get(mm); + lhigh.addMarkup(PhraseHighlight.create(lstart, lend)); + } else { + int lbeg = splitter.getPosition(mm); + int loff = mstart - lbeg; + LineHighlight lhigh = res.get(mm); + lhigh.addMarkup(PhraseHighlight.createStarter(loff)); + + lbeg = splitter.getPosition(nn); + loff = mend - lbeg; + lhigh = res.get(nn); + lhigh.addMarkup(PhraseHighlight.createEnder(loff)); + + /* + * Designate any intermediate lines as + * wholly-highlighted + */ + for (int j = mm + 1; j <= nn - 1; ++j) { + lhigh = res.get(j); + lhigh.addMarkup(PhraseHighlight.createEntire()); + } + } + } + } + } + + /* + * Condense PhraseHighlight instances within lines, and elide as + * necessary to the reportable length. + */ + for (LineHighlight lhi : res.values()) { + lhi.condenseMarkups(); + String line = splitter.getLine(lhi.getLineno()); + elideLine(lhi, line); + } + + return res; + } + + private void elideLine(LineHighlight lhi, String line) { + int excess = line.length() - args.getContextWidth(); + if (excess <= 0) { + return; + } + + // Account for an ellipsis. + int nellip = 1; + ++excess; + + /* + * The search/ view does not show leading whitespace anyway, so elide it + * straight away. + */ + int nwhsp0 = countStartingWhitespace(line); + if (nwhsp0 > 0) { + lhi.setLelide(Math.min(nwhsp0, excess)); + excess -= lhi.getLelide(); + if (excess <= 0) { + return; + } + + // Account for another ellipsis. + ++nellip; + ++excess; + } + + int nwhspz = countEndingWhitespace(line); + // If the end of the line can be elided, just truncate it. + if (lhi.countMarkups() < 1 || + lhi.getMarkup(lhi.countMarkups() - 1).getLineEnd() < + args.getContextWidth() || nwhspz >= excess) { + lhi.setRelide(line.length() - excess); + return; + } + + /* + * Find the width of bounds of markups. + */ + int lbound = -1, rbound = -1; + for (int i = 0; i < lhi.countMarkups(); ++i) { + PhraseHighlight phi = lhi.getMarkup(i); + if (phi.getLineStart() >= 0) { + lbound = phi.getLineStart(); + break; + } else if (phi.getLineStart() < 0) { + lbound = phi.getLineStart(); + break; + } else if (phi.getLineEnd() != Integer.MAX_VALUE) { + lbound = phi.getLineEnd() - 1; + break; + } else if (phi.getLineEnd() == Integer.MAX_VALUE) { + lbound = line.length() - 1; + break; + } + } + for (int i = lhi.countMarkups() - 1; i >= 0; --i) { + PhraseHighlight phi = lhi.getMarkup(i); + if (phi.getLineEnd() != Integer.MAX_VALUE) { + rbound = phi.getLineEnd(); + break; + } else if (phi.getLineEnd() != Integer.MAX_VALUE) { + rbound = line.length(); + break; + } else if (phi.getLineStart() >= 0) { + rbound = phi.getLineStart() + 1; + break; + } else if (phi.getLineStart() < 0) { + rbound = 1; + break; + } + } + // If the markup bounds are separated from the left margin... + if (lbound > 0 && rbound > 0) { + /* + * First use a rough estimate of three-quarters of a context-width + * before the midpoint of lbound and rbound. + */ + int calcLeft = Math.max(0, (lbound + rbound) / 2 - + args.getContextWidth() * 3 / 4 - 1); + // If past the lbound, then snap it left. + if (calcLeft > lbound) { + calcLeft = lbound; + } + if (calcLeft > lhi.getLelide()) { + // Possibly account for another ellipsis. + if (lhi.getLelide() < 1) { + ++nellip; + ++excess; + } + excess -= calcLeft - lhi.getLelide(); + lhi.setLelide(calcLeft); + } + // Continue below. + } + + // Truncate the line finally. + lhi.setRelide(line.length() - excess); + if (nellip > 1) { + /** + * Possibly shift the left elide leftward in case the rough + * estimate above was too far rightward. + */ + lhi.setLelide(lhi.getRelide() - args.getContextWidth() + nellip); + } + } + + private int countStartingWhitespace(String line) { + int n = 0; + for (int i = 0; i < line.length(); ++i) { + char c = line.charAt(i); + if (!Character.isWhitespace(c)) { + break; + } + ++n; + } + return n; + } + + private int countEndingWhitespace(String line) { + int n = 0; + for (int i = line.length() - 1; i >= 0; --i) { + char c = line.charAt(i); + if (!Character.isWhitespace(c)) { + break; + } + ++n; + } + return n; + } +} diff --git a/src/org/opensolaris/opengrok/search/context/PhraseHighlight.java b/src/org/opensolaris/opengrok/search/context/PhraseHighlight.java new file mode 100644 index 00000000000..ecc36e60673 --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/PhraseHighlight.java @@ -0,0 +1,106 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +/** + * Represents a highlighted phrase within a line -- possibly with bounds + * indicating that the highlight begins or ends on another line. + */ +public class PhraseHighlight { + + /** + * a value that has been translated from start offset w.r.t. document + * start to a value w.r.t. line start -- or -1 if not beginning this + * line + */ + private final int lineStart; + /** + * a value that has been translated from start offset w.r.t. document + * start to a value w.r.t. line start -- or {@link Integer#MAX_VALUE} if + * not ending this line + */ + private final int lineEnd; + + public static PhraseHighlight create(int start, int end) { + return new PhraseHighlight(start, end); + } + + public static PhraseHighlight createStarter(int start) { + return new PhraseHighlight(start, Integer.MAX_VALUE); + } + + public static PhraseHighlight createEnder(int end) { + return new PhraseHighlight(-1, end); + } + + public static PhraseHighlight createEntire() { + return new PhraseHighlight(-1, Integer.MAX_VALUE); + } + + /** + * Gets a value that has been translated from start offset w.r.t. document + * start to a value w.r.t. line start -- or -1 if not beginning this + * line. + */ + public int getLineStart() { + return lineStart; + } + + /** + * Gets a value that has been translated from start offset w.r.t. document + * start to a value w.r.t. line start -- or {@link Integer#MAX_VALUE} if + * not ending this line. + */ + public int getLineEnd() { + return lineEnd; + } + + /** + * Determines if the specified {@code other} overlaps with this instance. + * @return {@code true} if the instances overlap + */ + public boolean overlaps(PhraseHighlight other) { + return (lineStart >= other.lineStart && lineStart <= other.lineEnd) || + (other.lineStart >= lineStart && other.lineStart <= lineEnd) || + (lineEnd >= other.lineStart && lineEnd <= other.lineEnd) || + (other.lineEnd >= lineStart && other.lineEnd <= lineEnd); + } + + /** + * Creates a new instance that is the merging of this instance and the + * specified {@code other}. + * @return a defined instance + */ + public PhraseHighlight merge(PhraseHighlight other) { + int mergeStart = Math.min(lineStart, other.lineStart); + int mergeEnd = Math.max(lineEnd, other.lineEnd); + return PhraseHighlight.create(mergeStart, mergeEnd); + } + + /** private to enforce static create() methods */ + private PhraseHighlight(int start, int end) { + this.lineStart = start; + this.lineEnd = end; + } +} diff --git a/src/org/opensolaris/opengrok/search/context/PhraseHighlightComparator.java b/src/org/opensolaris/opengrok/search/context/PhraseHighlightComparator.java new file mode 100644 index 00000000000..2a934ecb22e --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/PhraseHighlightComparator.java @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.util.Comparator; + +/** + * Represents a {@link Comparator} for {@link PhraseHighlight}. + */ +public class PhraseHighlightComparator implements Comparator { + + public static final PhraseHighlightComparator INSTANCE = + new PhraseHighlightComparator(); + + @Override + public int compare(PhraseHighlight o1, PhraseHighlight o2) { + // ASC by lineStart, with -1 == -Inf. + if (o1.getLineStart() < 0) { + if (o2.getLineStart() >= 0) { + return -1; + } + } else if (o2.getLineStart() < 0) { + return 1; + } + int cmp = Integer.compare(o1.getLineStart(), o2.getLineStart()); + if (cmp != 0) { + return cmp; + } + // DESC by lineEnd, with -1 == Inf. + cmp = Integer.compare(o2.getLineEnd(), o1.getLineEnd()); + return cmp; + } + + /** private to enforce singleton */ + private PhraseHighlightComparator() { + } +} diff --git a/src/org/opensolaris/opengrok/search/context/StrictLineBreakIterator.java b/src/org/opensolaris/opengrok/search/context/StrictLineBreakIterator.java new file mode 100644 index 00000000000..76c4971f9f1 --- /dev/null +++ b/src/org/opensolaris/opengrok/search/context/StrictLineBreakIterator.java @@ -0,0 +1,205 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.text.BreakIterator; +import java.text.CharacterIterator; +import java.text.StringCharacterIterator; +import java.util.ArrayList; +import java.util.List; + +/** + * Represents a subclass of {@link BreakIterator} that breaks at standard + * OpenGrok EOL -- namely {@code \r\n}, {@code \n}, or {@code \r}. + */ +public class StrictLineBreakIterator extends BreakIterator { + + private final List breaks = new ArrayList<>(); + private char peekChar = CharacterIterator.DONE; + private CharacterIterator charIt; + private int breakOffset = -1; + + public StrictLineBreakIterator() { + charIt = new StringCharacterIterator(""); + } + + @Override + public int first() { + breaks.clear(); + breakOffset = -1; + charIt.first(); + return 0; + } + + @Override + public int last() { + int c; + do { + c = current(); + } while (next() != BreakIterator.DONE); + return c; + } + + @Override + public int next(int n) { + if (n < 0) { + throw new IllegalArgumentException("n cannot be negative"); + } + + int noff = current(); + for (int i = 0; i < n; ++i) { + noff = next(); + if (noff == BreakIterator.DONE) { + return noff; + } + } + return noff; + } + + @Override + public int next() { + if (breakOffset + 1 < breaks.size()) { + return breaks.get(++breakOffset); + } + + char lastChar = CharacterIterator.DONE; + int charOff; + while (true) { + char nextChar; + if (peekChar != CharacterIterator.DONE) { + nextChar = peekChar; + peekChar = CharacterIterator.DONE; + } else { + nextChar = charIt.next(); + } + + switch (nextChar) { + case CharacterIterator.DONE: + if (lastChar != CharacterIterator.DONE) { + charOff = charIt.getIndex(); + breaks.add(charOff); + ++breakOffset; + return charOff; + } else { + return BreakIterator.DONE; + } + case '\n': + // charOff is just past the LF + charOff = charIt.getIndex() + 1; + breaks.add(charOff); + ++breakOffset; + return charOff; + case '\r': + charOff = charIt.getIndex() + 1; + peekChar = charIt.next(); + switch (peekChar) { + case '\n': + peekChar = CharacterIterator.DONE; + // charOff is just past the LF + ++charOff; + breaks.add(charOff); + ++breakOffset; + return charOff; + case CharacterIterator.DONE: + default: + breaks.add(charOff); + ++breakOffset; + return charOff; + } + default: + lastChar = nextChar; + break; + } + } + } + + @Override + public int previous() { + if (breakOffset >= 0) { + if (--breakOffset >= 0) { + return breaks.get(breakOffset); + } + return 0; + } + return BreakIterator.DONE; + } + + @Override + public int following(int offset) { + if (breaks.size() > 0 && breaks.get(breaks.size() - 1) > offset) { + int lo = 0; + int hi = breaks.size() - 1; + int mid; + while (lo <= hi) { + mid = lo + (hi - lo) / 2; + int boff = breaks.get(mid); + if (offset < boff) { + if (mid < 1 || offset >= breaks.get(mid - 1)) { + return boff; + } else { + hi = mid - 1; + } + } else { + lo = mid + 1; + } + } + // This should not be reached. + return BreakIterator.DONE; + } + + int noff = BreakIterator.DONE; + do { + noff = next(); + if (noff > offset) { + return noff; + } + } while (noff != BreakIterator.DONE); + return noff; + } + + @Override + public int current() { + if (breakOffset < 0) { + return 0; + } + return breakOffset < breaks.size() ? breaks.get(breakOffset) : + charIt.current(); + } + + @Override + public CharacterIterator getText() { + return (CharacterIterator)charIt.clone(); + } + + @Override + public void setText(CharacterIterator newText) { + if (newText == null) { + throw new IllegalArgumentException("newText is null"); + } + this.charIt = newText; + this.breaks.clear(); + this.peekChar = newText.current(); + this.breakOffset = -1; + } +} diff --git a/src/org/opensolaris/opengrok/web/HtmlConsts.java b/src/org/opensolaris/opengrok/web/HtmlConsts.java index 1ce0cc37dee..eab457ea0f9 100644 --- a/src/org/opensolaris/opengrok/web/HtmlConsts.java +++ b/src/org/opensolaris/opengrok/web/HtmlConsts.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2017, Chris Fraire . + * Copyright (c) 2017-2018, Chris Fraire . */ package org.opensolaris.opengrok.web; @@ -28,6 +28,7 @@ */ public class HtmlConsts { public static final String SPAN_A = ""; + public static final String SPAN_B = ""; public static final String SPAN_C = ""; public static final String SPAN_D = ""; public static final String SPAN_N = ""; @@ -44,4 +45,10 @@ public class HtmlConsts { public static final String NUMBER_CLASS = "n"; public static final String STRING_CLASS = "s"; + public static final String B = ""; + public static final String ZB = ""; + + public static final String BR = "
"; + + public static final String HELLIP = "…"; } diff --git a/src/org/opensolaris/opengrok/web/SearchHelper.java b/src/org/opensolaris/opengrok/web/SearchHelper.java index 0ec947722e3..029c22f3977 100644 --- a/src/org/opensolaris/opengrok/web/SearchHelper.java +++ b/src/org/opensolaris/opengrok/web/SearchHelper.java @@ -20,7 +20,7 @@ /* * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. * Portions copyright (c) 2011 Jens Elkner. - * Portions Copyright (c) 2017, Chris Fraire . + * Portions Copyright (c) 2017-2018, Chris Fraire . */ package org.opensolaris.opengrok.web; @@ -28,6 +28,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -40,7 +41,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.BooleanQuery; @@ -50,6 +50,7 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.SuggestMode; @@ -61,12 +62,15 @@ import org.opensolaris.opengrok.configuration.Project; import org.opensolaris.opengrok.configuration.RuntimeEnvironment; import org.opensolaris.opengrok.configuration.SuperIndexSearcher; +import org.opensolaris.opengrok.index.IndexAnalysisSettings; +import org.opensolaris.opengrok.index.IndexAnalysisSettingsAccessor; import org.opensolaris.opengrok.index.IndexDatabase; import org.opensolaris.opengrok.logger.LoggerFactory; import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.search.Summarizer; import org.opensolaris.opengrok.search.context.Context; import org.opensolaris.opengrok.search.context.HistoryContext; +import org.opensolaris.opengrok.util.ForbiddenSymlinkException; import org.opensolaris.opengrok.util.IOUtils; /** @@ -145,6 +149,11 @@ public class SearchHelper { * {@link #prepareExec(SortedSet)} and {@link #executeQuery()}. */ public String errorMsg; + /** + * the reader used to open the index. Automatically set via + * {@link #prepareExec(SortedSet)}. + */ + private IndexReader reader; /** * the searcher used to open/search the index. Automatically set via * {@link #prepareExec(SortedSet)}. @@ -169,7 +178,7 @@ public class SearchHelper { */ public long totalHits; /** - * the query created by the used {@link QueryBuilder} via + * the query created by {@link #builder} via * {@link #prepareExec(SortedSet)}. */ public Query query; @@ -205,6 +214,11 @@ public class SearchHelper { */ public static final String PARSE_ERROR_MSG = "Unable to parse your query: "; + /** + * Key is Project name or empty string for null Project + */ + private Map mappedAnalysisSettings; + /** * User readable description for file types. Only those listed in * fileTypeDescription will be shown to the user. @@ -246,6 +260,7 @@ public SearchHelper prepareExec(SortedSet projects) { return this; } + mappedAnalysisSettings = null; // the Query created by the QueryBuilder try { indexDir = new File(dataRoot, IndexDatabase.INDEX_DIR); @@ -258,7 +273,8 @@ public SearchHelper prepareExec(SortedSet projects) { if (projects.isEmpty()) { // no project setup FSDirectory dir = FSDirectory.open(indexDir.toPath()); - searcher = new IndexSearcher(DirectoryReader.open(dir)); + reader = DirectoryReader.open(dir); + searcher = new IndexSearcher(reader); closeOnDestroy = true; } else { // Check list of project names first to make sure all of them @@ -288,10 +304,10 @@ public SearchHelper prepareExec(SortedSet projects) { // We use MultiReader even for single project. This should // not matter given that MultiReader is just a cheap wrapper // around set of IndexReader objects. - MultiReader multireader = RuntimeEnvironment.getInstance(). - getMultiReader(projects, searcherList); - if (multireader != null) { - searcher = new IndexSearcher(multireader); + reader = RuntimeEnvironment.getInstance().getMultiReader( + projects, searcherList); + if (reader != null) { + searcher = new IndexSearcher(reader); } else { errorMsg = "Failed to initialize search. Check the index."; return this; @@ -520,7 +536,7 @@ public SearchHelper prepareSummary() { return this; } try { - sourceContext = new Context(query, builder.getQueries()); + sourceContext = new Context(query, builder); summarizer = new Summarizer(query, new CompatibleAnalyser()); } catch (Exception e) { LOGGER.log(Level.WARNING, "Summarizer: {0}", e.getMessage()); @@ -550,4 +566,102 @@ public void destroy() { } } } -} \ No newline at end of file + + /** + * Searches for a document for a single file from the index. + * @param file the file whose definitions to find + * @return {@link ScoreDoc#doc} or -1 if it could not be found + * @throws IOException if an error happens when accessing the index + * @throws ParseException if an error happens when building the Lucene query + */ + public int searchSingle(File file) throws IOException, + ParseException { + + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + String path; + try { + path = env.getPathRelativeToSourceRoot(file); + } catch (ForbiddenSymlinkException e) { + LOGGER.log(Level.FINER, e.getMessage()); + return -1; + } + //sanitize windows path delimiters + //in order not to conflict with Lucene escape character + path = path.replace("\\", "/"); + + QueryBuilder singleBuilder = new QueryBuilder(); + if (builder != null) { + singleBuilder.reset(builder); + } + query = singleBuilder.setPath(path).build(); + + TopDocs top = searcher.search(query, 1); + if (top.totalHits == 0) { + return -1; + } + + int docID = top.scoreDocs[0].doc; + Document doc = searcher.doc(docID); + + String foundPath = doc.get(QueryBuilder.PATH); + // Only use the result if PATH matches exactly. + if (!path.equals(foundPath)) { + return -1; + } + + return docID; + } + + /** + * Gets the persisted tabSize via {@link #getSettings(java.lang.String)} if + * available or returns the {@code proj} tabSize if available -- or zero. + * @param proj a defined instance or {@code null} if no project is active + * @return tabSize + * @throws IOException if an I/O error occurs querying the active reader + */ + public int getTabSize(Project proj) throws IOException { + String projectName = proj != null ? proj.getName() : null; + IndexAnalysisSettings settings = getSettings(projectName); + int tabSize; + if (settings != null && settings.getTabSize() != null) { + tabSize = settings.getTabSize(); + } else { + tabSize = proj != null ? proj.getTabSize() : 0; + } + return tabSize; + } + + /** + * Gets the settings for a specified project, querying the active reader + * upon the first call after {@link #prepareExec(java.util.SortedSet)}. + * @param projectName a defined instance or {@code null} if no project is + * active (or empty string to mean the same thing) + * @return a defined instance or {@code null} if none is found + * @throws IOException if an I/O error occurs querying the active reader + */ + public IndexAnalysisSettings getSettings(String projectName) + throws IOException { + if (mappedAnalysisSettings == null) { + IndexAnalysisSettingsAccessor dao = + new IndexAnalysisSettingsAccessor(); + IndexAnalysisSettings[] setts = dao.read(reader, Short.MAX_VALUE); + mappedAnalysisSettings = map(setts); + } + + String k = projectName != null ? projectName : ""; + return mappedAnalysisSettings.get(k); + } + + private Map map( + IndexAnalysisSettings[] setts) { + + Map res = new HashMap<>(); + for (int i = 0; i < setts.length; ++i) { + IndexAnalysisSettings settings = setts[i]; + String k = settings.getProjectName() != null ? + settings.getProjectName() : ""; + res.put(k, settings); + } + return res; + } +} diff --git a/test/org/opensolaris/opengrok/analysis/plain/DefinitionsTokenStreamTest.java b/test/org/opensolaris/opengrok/analysis/plain/DefinitionsTokenStreamTest.java index 6543ee3dbf7..ce03c225f1b 100644 --- a/test/org/opensolaris/opengrok/analysis/plain/DefinitionsTokenStreamTest.java +++ b/test/org/opensolaris/opengrok/analysis/plain/DefinitionsTokenStreamTest.java @@ -116,7 +116,7 @@ private void testDefinitionsVsContent(boolean expandTabs, String source; try (Reader rdr = ExpandTabsReader.wrap( IOUtils.createBOMStrippedReader(src.getStream(), - StandardCharsets.UTF_8.name()), tabSize)) { + StandardCharsets.UTF_8.name()), tabSize)) { int c; while ((c = rdr.read()) != -1) { bld.append((char)c); @@ -158,8 +158,8 @@ private void testDefinitionsVsContent(boolean expandTabs, boolean cutContainsTerm = cutValue.endsWith(termValue); assertTrue("cut term" + count + " at " + - (offs.startOffset()) + "-" + (offs.endOffset()) + "[" - + cutValue + "] vs [" + termValue + "]", cutContainsTerm); + (offs.startOffset()) + "-" + (offs.endOffset()) + "[" + + cutValue + "] vs [" + termValue + "]", cutContainsTerm); } assertEquals("token count", expectedCount, count); diff --git a/test/org/opensolaris/opengrok/search/context/ContextFormatterTest.java b/test/org/opensolaris/opengrok/search/context/ContextFormatterTest.java new file mode 100644 index 00000000000..821cbb39bf3 --- /dev/null +++ b/test/org/opensolaris/opengrok/search/context/ContextFormatterTest.java @@ -0,0 +1,196 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import org.apache.lucene.search.uhighlight.Passage; +import org.apache.lucene.util.BytesRef; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import org.junit.Test; +import static org.opensolaris.opengrok.util.CustomAssertions.assertLinesEqual; + +/** + * Represents a container for tests of {@link ContextFormatter}. + */ +public class ContextFormatterTest { + + private static final String DOC = + " Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" + + "Mauris vel tortor vel nisl efficitur fermentum nec vel erat.\n" + + "Mauris diam nisl, tincidunt nec gravida sit amet, efficitur vitae\n" + + "est. Sed aliquam non mi vel mattis:\n" + + "\n" + + " Maecenas vitae lacus velit varius vulputate ipsum sed laoreet. Nam maximus libero non ornare egestas. Aenean dignissim ipsum eu rhoncus ultricies.\n" + + "\n" + + " Fusce pretium hendrerit dictum. Pellentesque habitant\n" + + "morbi tristique senectus et netus."; + + private static final String DOC2 = + "abc\n" + + "def\n" + + "ghi"; + + @Test + public void testLineMatchFormatted() { + final String WORD = "gravida"; + int woff = DOC.indexOf(WORD); + assertTrue(WORD, woff >= 0); + + Passage p = new Passage(); + p.setStartOffset(woff); + p.setEndOffset(woff + WORD.length()); + p.addMatch(woff, p.getEndOffset(), new BytesRef(WORD)); + assertEquals("getNumMatches()", 1, p.getNumMatches()); + + // First, test with contextCount==0 + ContextArgs args = new ContextArgs((short)0, (short)10); + ContextFormatter fmt = new ContextFormatter(args); + fmt.setUrl("http://example.com"); + Object res = fmt.format(new Passage[] {p}, DOC); + assertNotNull("format() result", res); + + final String DOCCTX_0 = + "" + + "3 Mauris diam nisl, tincidunt nec gravida sit" + + " amet, efficitur vitae
\n"; + String ctx = res.toString(); + assertLinesEqual("format().toString()", DOCCTX_0, ctx); + + // Second, test with contextCount==1 + args = new ContextArgs((short)1, (short)10); + fmt = new ContextFormatter(args); + fmt.setUrl("http://example.com"); + res = fmt.format(new Passage[] {p}, DOC); + assertNotNull("format() result", res); + + final String DOCCTX_1 = + "" + + "2 Mauris vel tortor vel nisl efficitur fermentum nec vel" + + " erat.
" + + "" + + "3 Mauris diam nisl, tincidunt nec gravida sit" + + " amet, efficitur vitae
" + + "" + + "4 est. Sed aliquam non mi vel mattis:
"; + ctx = res.toString(); + assertLinesEqual("format().toString()", DOCCTX_1, ctx); + } + + @Test + public void testLinesSpanningMatchFormatted() { + Passage p = new Passage(); + p.setStartOffset(0); + p.setEndOffset(DOC2.length()); + p.addMatch(0, p.getEndOffset(), new BytesRef(DOC2)); + assertEquals("getNumMatches()", 1, p.getNumMatches()); + + /** + * We're using the entire document, but see how it behaves with + * contextCount==1 + */ + ContextArgs args = new ContextArgs((short)1, (short)10); + ContextFormatter fmt = new ContextFormatter(args); + fmt.setUrl("http://example.com"); + Object res = fmt.format(new Passage[] {p}, DOC2); + assertNotNull("format() result", res); + + final String DOC2CTX = + "" + + "1 abc
" + + "" + + "2 def
" + + "" + + "3 ghi
"; + String ctx = res.toString(); + assertLinesEqual("format().toString()", DOC2CTX, ctx); + } + + @Test + public void testElidedMatchFormatted() { + final String WORD = "dignissim"; + int woff = DOC.indexOf(WORD); + assertTrue(WORD, woff >= 0); + + Passage p = new Passage(); + p.setStartOffset(woff); + p.setEndOffset(woff + WORD.length()); + p.addMatch(woff, p.getEndOffset(), new BytesRef(WORD)); + assertEquals("getNumMatches()", 1, p.getNumMatches()); + + // First, test with contextCount==0 + ContextArgs args = new ContextArgs((short)0, (short)10); + ContextFormatter fmt = new ContextFormatter(args); + fmt.setUrl("http://example.com"); + Object res = fmt.format(new Passage[] {p}, DOC); + assertNotNull("format() result", res); + + final String DOCCTX_0 = + "" + + "6 …putate ipsum sed laoreet. Nam maximus libero" + + " non ornare egestas. Aenean dignissim ipsum eu" + + " rhoncus…
\n"; + String ctx = res.toString(); + assertLinesEqual("format().toString()", DOCCTX_0, ctx); + + // Second, test with contextCount==1 + args = new ContextArgs((short)1, (short)10); + fmt = new ContextFormatter(args); + fmt.setUrl("http://example.com"); + res = fmt.format(new Passage[] {p}, DOC); + assertNotNull("format() result", res); + + final String DOCCTX_1 = + "" + + "5
" + + "" + + "6 …putate ipsum sed laoreet. Nam maximus libero" + + " non ornare egestas. Aenean dignissim ipsum eu" + + " rhoncus…
" + + "" + + "7
"; + ctx = res.toString(); + assertLinesEqual("format().toString()", DOCCTX_1, ctx); + + // Third, test with contextCount==1 and a line limit + args = new ContextArgs((short)1, (short)10); + fmt = new ContextFormatter(args); + fmt.setUrl("http://example.com"); + fmt.setMoreLimit(2); + fmt.setMoreUrl("http://example.com/more"); + res = fmt.format(new Passage[] {p}, DOC); + assertNotNull("format() result", res); + + final String DOCCTX_2M = + "" + + "5
" + + "" + + "6 …putate ipsum sed laoreet. Nam maximus libero" + + " non ornare egestas. Aenean dignissim ipsum eu" + + " rhoncus…
" + + "[all …]
"; + ctx = res.toString(); + assertLinesEqual("format().toString()", DOCCTX_2M, ctx); + } +} diff --git a/test/org/opensolaris/opengrok/search/context/ContextTest.java b/test/org/opensolaris/opengrok/search/context/ContextTest.java index a66efce5880..5cfe6be3e0c 100644 --- a/test/org/opensolaris/opengrok/search/context/ContextTest.java +++ b/test/org/opensolaris/opengrok/search/context/ContextTest.java @@ -83,32 +83,32 @@ public void testIsEmpty() throws ParseException { // Definition search should be used QueryBuilder qb = new QueryBuilder().setDefs(term); - Context c = new Context(qb.build(), qb.getQueries()); + Context c = new Context(qb.build(), qb); assertFalse(c.isEmpty()); // Symbol search should be used qb = new QueryBuilder().setRefs(term); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertFalse(c.isEmpty()); // Full search should be used qb = new QueryBuilder().setFreetext(term); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertFalse(c.isEmpty()); // History search should not be used qb = new QueryBuilder().setHist(term); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertTrue(c.isEmpty()); // Path search should not be used qb = new QueryBuilder().setPath(term); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertTrue(c.isEmpty()); // Combined search should be fine qb = new QueryBuilder().setHist(term).setFreetext(term); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertFalse(c.isEmpty()); } @@ -142,7 +142,7 @@ private void testGetContext(boolean limit, boolean hitList) // Search freetext for the term "def" QueryBuilder qb = new QueryBuilder().setFreetext("def"); - Context c = new Context(qb.build(), qb.getQueries()); + Context c = new Context(qb.build(), qb); assertTrue(c.getContext(in, out, "", "", "", null, limit, qb.isDefSearch(), hits)); if (hitList) { @@ -166,7 +166,7 @@ private void testGetContext(boolean limit, boolean hitList) out = hitList ? null : new StringWriter(); hits = hitList ? new ArrayList<>() : null; qb = new QueryBuilder().setDefs("def"); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertTrue(c.getContext(in, out, "", "", "", defs, limit, qb.isDefSearch(), hits)); if (hitList) { @@ -200,7 +200,7 @@ private void testGetContext(boolean limit, boolean hitList) out = hitList ? null : new StringWriter(); hits = hitList ? new ArrayList<>() : null; qb = new QueryBuilder().setDefs("def"); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertTrue(c.getContext(in, out, "", "", "", defs, limit, qb.isDefSearch(), hits)); if (hitList) { @@ -221,7 +221,7 @@ private void testGetContext(boolean limit, boolean hitList) out = hitList ? null : new StringWriter(); hits = hitList ? new ArrayList<>() : null; qb = new QueryBuilder().setDefs("def"); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertTrue(c.getContext(in, out, "", "", "", defs, limit, qb.isDefSearch(), hits)); if (hitList) { @@ -241,7 +241,7 @@ private void testGetContext(boolean limit, boolean hitList) out = hitList ? null : new StringWriter(); hits = hitList ? new ArrayList<>() : null; qb = new QueryBuilder().setFreetext("no_match"); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertFalse(c.getContext(in, out, "", "", "", null, limit, qb.isDefSearch(), hits)); if (hitList) { assertEquals(0, hits.size()); @@ -254,7 +254,7 @@ private void testGetContext(boolean limit, boolean hitList) out = hitList ? null : new StringWriter(); hits = hitList ? new ArrayList<>() : null; qb = new QueryBuilder().setHist("abc"); - c = new Context(qb.build(), qb.getQueries()); + c = new Context(qb.build(), qb); assertFalse(c.getContext(in, out, "", "", "", null, limit, qb.isDefSearch(), hits)); if (hitList) { assertEquals(0, hits.size()); @@ -280,7 +280,7 @@ public void testLongLineNearBufferBoundary() throws ParseException { substring.length); Reader in = new CharArrayReader(chars); QueryBuilder qb = new QueryBuilder().setFreetext("test"); - Context c = new Context(qb.build(), qb.getQueries()); + Context c = new Context(qb.build(), qb); StringWriter out = new StringWriter(); boolean match = c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null); @@ -310,7 +310,7 @@ public void testAllLinkWithLongLines() throws ParseException { StringWriter out = new StringWriter(); QueryBuilder qb = new QueryBuilder().setFreetext("search_for_me"); - Context c = new Context(qb.build(), qb.getQueries()); + Context c = new Context(qb.build(), qb); boolean match = c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null); @@ -339,7 +339,7 @@ public void testLongTruncatedLine() throws ParseException { StringWriter out = new StringWriter(); QueryBuilder qb = new QueryBuilder().setFreetext("search_for_me"); - Context c = new Context(qb.build(), qb.getQueries()); + Context c = new Context(qb.build(), qb); boolean match = c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null); @@ -369,7 +369,7 @@ public void testMultiLineMatch() throws Exception { // input file. The generated HTML fragment is inserted inside a root // element so that the StringWriter contains a valid XML document. QueryBuilder qb = new QueryBuilder().setFreetext("\"a b c\""); - Context c = new Context(qb.build(), qb.getQueries()); + Context c = new Context(qb.build(), qb); assertTrue( "No match found", c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null)); @@ -408,7 +408,7 @@ public void bug16848() throws Exception { StringReader in = new StringReader("Mixed case: abc AbC dEf\n"); StringWriter out = new StringWriter(); QueryBuilder qb = new QueryBuilder().setFreetext("mixed"); - Context c = new Context(qb.build(), qb.getQueries()); + Context c = new Context(qb.build(), qb); assertTrue(c.getContext(in, out, "", "", "", null, false, qb.isDefSearch(), null)); assertEquals("1 " + "Mixed case: abc AbC dEf
", @@ -465,7 +465,7 @@ private void bug17582(QueryBuilder builder, int[] lines, String[] tags) defs.addTag(2, "bug17582", "type1", "text1", 0, 0); defs.addTag(3, "Bug17582", "type2", "text2", 0, 0); - Context context = new Context(builder.build(), builder.getQueries()); + Context context = new Context(builder.build(), builder); ArrayList hits = new ArrayList<>(); assertEquals(lines.length != 0, context.getContext(in, null, "", "", "", defs, false, builder.isDefSearch(), hits)); @@ -506,7 +506,7 @@ private void searchContextTestHelper(String searchInText, String queryString, St StringWriter out = new StringWriter(); QueryBuilder qb = new QueryBuilder().setFreetext(queryString); - Context c = new Context(qb.build(), qb.getQueries()); + Context c = new Context(qb.build(), qb); boolean match = c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null); diff --git a/test/org/opensolaris/opengrok/search/context/PassageConverterTest.java b/test/org/opensolaris/opengrok/search/context/PassageConverterTest.java new file mode 100644 index 00000000000..33f6673bc0a --- /dev/null +++ b/test/org/opensolaris/opengrok/search/context/PassageConverterTest.java @@ -0,0 +1,234 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.util.SortedMap; +import org.apache.lucene.search.uhighlight.Passage; +import org.apache.lucene.util.BytesRef; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import org.junit.BeforeClass; +import org.junit.Test; +import org.opensolaris.opengrok.util.SourceSplitter; + +/** + * Represents a container for tests of {@link PassageConverter} etc. + */ +public class PassageConverterTest { + + private static final String DOC = + " Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" + + "Mauris vel tortor vel nisl efficitur fermentum nec vel erat.\n" + + "Mauris diam nisl, tincidunt nec gravida sit amet, efficitur vitae\n" + + "est. Sed aliquam non mi vel mattis:\n" + + "\n" + + " Maecenas vitae lacus velit varius vulputate ipsum sed laoreet. Nam maximus libero non ornare egestas. Aenean dignissim ipsum eu rhoncus ultricies.\n" + + "\n" + + " Fusce pretium hendrerit dictum. Pellentesque habitant\n" + + "morbi tristique senectus et netus."; + + private static final String DOC2 = + "abc\n" + + "def\n" + + "ghi"; + + private static SourceSplitter splitter; + private static SourceSplitter splitter2; + + @BeforeClass + public static void setUpClass() throws Exception { + splitter = new SourceSplitter(); + splitter.reset(DOC); + splitter2 = new SourceSplitter(); + splitter2.reset(DOC2); + } + + @Test + public void testOneWord() { + final String WORD = "gravida"; + int woff = DOC.indexOf(WORD); + assertTrue(WORD, woff >= 0); + + Passage p = new Passage(); + p.setStartOffset(woff); + p.setEndOffset(woff + WORD.length()); + p.addMatch(woff, p.getEndOffset(), new BytesRef(WORD)); + assertEquals("getNumMatches()", 1, p.getNumMatches()); + + PassageConverter cvt = getConverter((short)0); + SortedMap linemap = + cvt.convert(new Passage[] {p}, splitter); + + assertEquals("linemap size()", 1, linemap.size()); + int lineno = linemap.firstKey(); + assertEquals("lineno", 2, lineno); + + LineHighlight lhi = linemap.get(lineno); + assertNotNull("get LineHighlight", lhi); + assertEquals("getLelide()", 0, lhi.getLelide()); + assertEquals("getRelide()", 0, lhi.getRelide()); + assertEquals("countMarkups()", 1, lhi.countMarkups()); + + PhraseHighlight phi = lhi.getMarkup(0); + assertNotNull("get PhraseHighlight", phi); + assertEquals("getLineStart()", 32, phi.getLineStart()); + assertEquals("getLineEnd()", 32 + WORD.length(), phi.getLineEnd()); + } + + @Test + public void testOneWordElided() { + final String WORD = "dignissim"; + int woff = DOC.indexOf(WORD); + assertTrue(WORD, woff >= 0); + + Passage p = new Passage(); + p.setStartOffset(woff); + p.setEndOffset(woff + WORD.length()); + p.addMatch(woff, p.getEndOffset(), new BytesRef(WORD)); + assertEquals("getNumMatches()", 1, p.getNumMatches()); + + PassageConverter cvt = getConverter((short)0); + SortedMap linemap = + cvt.convert(new Passage[] {p}, splitter); + + assertEquals("linemap size()", 1, linemap.size()); + int lineno = linemap.firstKey(); + assertEquals("lineno", 5, lineno); + + LineHighlight lhi = linemap.get(lineno); + assertNotNull("get LineHighlight", lhi); + assertEquals("getLelide()", 41, lhi.getLelide()); + assertEquals("getRelide()", 139, lhi.getRelide()); + assertEquals("getRelide()", 139 - 41, cvt.getArgs().getContextWidth() - + 2); + assertEquals("countMarkups()", 1, lhi.countMarkups()); + + PhraseHighlight phi = lhi.getMarkup(0); + assertNotNull("get PhraseHighlight", phi); + assertEquals("getLineStart()", 113, phi.getLineStart()); + assertEquals("getLineEnd()", 113 + WORD.length(), phi.getLineEnd()); + } + + @Test + public void testLineSpanningMatch() { + final String PHRASE = "elit.\nMauris"; + int poff = DOC.indexOf(PHRASE); + assertTrue(PHRASE, poff >= 0); + + Passage p = new Passage(); + p.setStartOffset(poff); + p.setEndOffset(poff + PHRASE.length()); + p.addMatch(poff, p.getEndOffset(), new BytesRef(PHRASE)); + assertEquals("getNumMatches()", 1, p.getNumMatches()); + + PassageConverter cvt = getConverter((short)0); + SortedMap linemap = + cvt.convert(new Passage[] {p}, splitter); + + assertEquals("linemap size()", 2, linemap.size()); + int lineno = linemap.firstKey(); + assertEquals("first lineno", 0, lineno); + assertTrue("linemap[1] exists", linemap.containsKey(1)); + + LineHighlight lhi = linemap.get(lineno); + assertNotNull("get LineHighlight", lhi); + assertEquals("getLelide()", 0, lhi.getLelide()); + assertEquals("getRelide()", 0, lhi.getRelide()); + assertEquals("countMarkups()", 1, lhi.countMarkups()); + + PhraseHighlight phi = lhi.getMarkup(0); + assertNotNull("get PhraseHighlight", phi); + assertEquals("getLineStart()", 55, phi.getLineStart()); + assertEquals("getLineEnd()", Integer.MAX_VALUE, phi.getLineEnd()); + + lhi = linemap.get(lineno + 1); + assertNotNull("get LineHighlight", lhi); + assertEquals("getLelide()", 0, lhi.getLelide()); + assertEquals("getRelide()", 0, lhi.getRelide()); + assertEquals("countMarkups()", 1, lhi.countMarkups()); + + phi = lhi.getMarkup(0); + assertNotNull("get PhraseHighlight", phi); + assertEquals("getLineStart()", -1, phi.getLineStart()); + assertEquals("getLineEnd()", 6, phi.getLineEnd()); + } + + @Test + public void testLinesSpanningMatch() { + Passage p = new Passage(); + p.setStartOffset(0); + p.setEndOffset(DOC2.length()); + p.addMatch(0, p.getEndOffset(), new BytesRef(DOC2)); + assertEquals("getNumMatches()", 1, p.getNumMatches()); + + PassageConverter cvt = getConverter((short)0); + SortedMap linemap = + cvt.convert(new Passage[] {p}, splitter2); + + assertEquals("linemap size()", 3, linemap.size()); + int lineno = linemap.firstKey(); + assertEquals("first lineno", 0, lineno); + assertTrue("linemap[1] exists", linemap.containsKey(1)); + assertTrue("linemap[2] exists", linemap.containsKey(2)); + + LineHighlight lhi = linemap.get(lineno); + assertNotNull("get LineHighlight", lhi); + assertEquals("getLelide()", 0, lhi.getLelide()); + assertEquals("getRelide()", 0, lhi.getRelide()); + assertEquals("countMarkups()", 1, lhi.countMarkups()); + + PhraseHighlight phi = lhi.getMarkup(0); + assertNotNull("get PhraseHighlight", phi); + assertEquals("getLineStart()", 0, phi.getLineStart()); + assertEquals("getLineEnd()", Integer.MAX_VALUE, phi.getLineEnd()); + + lhi = linemap.get(lineno + 1); + assertNotNull("get LineHighlight", lhi); + assertEquals("getLelide()", 0, lhi.getLelide()); + assertEquals("getRelide()", 0, lhi.getRelide()); + assertEquals("countMarkups()", 1, lhi.countMarkups()); + + phi = lhi.getMarkup(0); + assertNotNull("get PhraseHighlight", phi); + assertEquals("getLineStart()", -1, phi.getLineStart()); + assertEquals("getLineEnd()", Integer.MAX_VALUE, phi.getLineEnd()); + + lhi = linemap.get(lineno + 2); + assertNotNull("get LineHighlight", lhi); + assertEquals("getLelide()", 0, lhi.getLelide()); + assertEquals("getRelide()", 0, lhi.getRelide()); + assertEquals("countMarkups()", 1, lhi.countMarkups()); + + phi = lhi.getMarkup(0); + assertNotNull("get PhraseHighlight", phi); + assertEquals("getLineStart()", -1, phi.getLineStart()); + assertEquals("getLineEnd()", 3, phi.getLineEnd()); + } + + private static PassageConverter getConverter(short contextCount) { + ContextArgs args = new ContextArgs(contextCount, (short)10); + return new PassageConverter(args); + } +} diff --git a/test/org/opensolaris/opengrok/search/context/SearchAndContextFormatterTest.java b/test/org/opensolaris/opengrok/search/context/SearchAndContextFormatterTest.java new file mode 100644 index 00000000000..cad4a3c309d --- /dev/null +++ b/test/org/opensolaris/opengrok/search/context/SearchAndContextFormatterTest.java @@ -0,0 +1,183 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Map; +import java.util.TreeSet; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.junit.After; +import org.junit.AfterClass; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.opensolaris.opengrok.analysis.FileAnalyzer; +import org.opensolaris.opengrok.analysis.plain.PlainAnalyzerFactory; +import org.opensolaris.opengrok.configuration.RuntimeEnvironment; +import org.opensolaris.opengrok.history.HistoryGuru; +import org.opensolaris.opengrok.index.Indexer; +import org.opensolaris.opengrok.util.TestRepository; +import org.opensolaris.opengrok.history.RepositoryFactory; +import org.opensolaris.opengrok.search.QueryBuilder; +import org.opensolaris.opengrok.search.SearchEngine; +import static org.opensolaris.opengrok.util.CustomAssertions.assertLinesEqual; + +/** + * Represents a container for tests of {@link SearchEngine{ with + * {@link ContextFormatter} etc. + *

+ * Derived from Trond Norbye's {@code SearchEngineTest} + */ +public class SearchAndContextFormatterTest { + + private static RuntimeEnvironment env; + private static TestRepository repository; + private static File configFile; + private static boolean skip = false; + + @BeforeClass + public static void setUpClass() throws Exception { + repository = new TestRepository(); + repository.create(HistoryGuru.class.getResourceAsStream( + "repositories.zip")); + + env = RuntimeEnvironment.getInstance(); + env.setCtags(System.getProperty( + "org.opensolaris.opengrok.analysis.Ctags", "ctags")); + env.setSourceRoot(repository.getSourceRoot()); + env.setDataRoot(repository.getDataRoot()); + RepositoryFactory.initializeIgnoredNames(env); + + if (env.validateExuberantCtags()) { + env.setSourceRoot(repository.getSourceRoot()); + env.setDataRoot(repository.getDataRoot()); + env.setVerbose(false); + env.setHistoryEnabled(false); + Indexer.getInstance().prepareIndexer(env, true, true, + new TreeSet<>(Arrays.asList(new String[]{"/c"})), + false, false, null, null, new ArrayList<>(), false); + Indexer.getInstance().doIndexerExecution(true, null, null); + } else { + System.out.println( + "Skipping test. Could not find a ctags I could use in path."); + skip = true; + } + + configFile = File.createTempFile("configuration", ".xml"); + env.writeConfiguration(configFile); + RuntimeEnvironment.getInstance().readConfiguration(new File( + configFile.getAbsolutePath())); + } + + @AfterClass + public static void tearDownClass() throws Exception { + repository.destroy(); + configFile.delete(); + skip = false; + } + + @Before + public void setUp() { + } + + @After + public void tearDown() { + } + + @Test + public void testSearch() throws IOException, InvalidTokenOffsetsException { + if (skip) { + return; + } + + SearchEngine instance; + int noHits; + + instance = new SearchEngine(); + instance.setFreetext("embedded"); + instance.setFile("main.c"); + noHits = instance.search(); + assertTrue("noHits should be positive", noHits > 0); + String[] frags = getFirstFragments(instance); + assertNotNull("getFirstFragments() should return something", frags); + assertTrue("frags should have one element", frags.length == 1); + + final String CTX = + "9 /*
" + + "10 Multi line comment, with embedded strange characters: < > &,
" + + "11 email address: testuser@example.com and even an URL:
"; + assertLinesEqual("ContextFormatter output", CTX, frags[0]); + instance.destroy(); + } + + private String[] getFirstFragments(SearchEngine instance) + throws IOException, InvalidTokenOffsetsException { + + ContextArgs args = new ContextArgs((short)1, (short)10); + + /* + * The following `anz' should go unused, but UnifiedHighlighter demands + * an analyzer "even if in some circumstances it isn't used." + */ + PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE; + FileAnalyzer anz = fac.getAnalyzer(); + + ContextFormatter formatter = new ContextFormatter(args); + OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, + instance.getSearcher(), anz); + uhi.setBreakIterator(() -> new StrictLineBreakIterator()); + uhi.setFormatter(formatter); + + ScoreDoc[] docs = instance.scoreDocs(); + for (int i = 0; i < docs.length; ++i) { + int docid = docs[i].doc; + Document doc = instance.doc(docid); + + String path = doc.get(QueryBuilder.PATH); + System.out.println(path); + formatter.setUrl("/source" + path); + + for (String contextField : + instance.getQueryBuilder().getContextFields()) { + + Map res = uhi.highlightFields( + new String[]{contextField}, instance.getQueryObject(), + new int[] {docid}, new int[] {10}); + String[] frags = res.getOrDefault(contextField, null); + if (frags != null) { + return frags; + } + } + } + return null; + } +} diff --git a/test/org/opensolaris/opengrok/search/context/SearchAndContextFormatterTest2.java b/test/org/opensolaris/opengrok/search/context/SearchAndContextFormatterTest2.java new file mode 100644 index 00000000000..566879dddd3 --- /dev/null +++ b/test/org/opensolaris/opengrok/search/context/SearchAndContextFormatterTest2.java @@ -0,0 +1,262 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.junit.After; +import org.junit.AfterClass; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.opensolaris.opengrok.analysis.FileAnalyzer; +import org.opensolaris.opengrok.analysis.plain.PlainAnalyzerFactory; +import org.opensolaris.opengrok.configuration.Project; +import org.opensolaris.opengrok.configuration.RuntimeEnvironment; +import org.opensolaris.opengrok.history.HistoryGuru; +import org.opensolaris.opengrok.index.Indexer; +import org.opensolaris.opengrok.util.TestRepository; +import org.opensolaris.opengrok.history.RepositoryFactory; +import org.opensolaris.opengrok.search.QueryBuilder; +import org.opensolaris.opengrok.search.SearchEngine; +import static org.opensolaris.opengrok.util.CustomAssertions.assertLinesEqual; +import org.opensolaris.opengrok.util.FileUtilities; +import org.opensolaris.opengrok.util.IOUtils; + +/** + * Represents a container for tests of {@link SearchEngine} with + * {@link ContextFormatter} etc. with a non-zero tab-size. + *

+ * Derived from Trond Norbye's {@code SearchEngineTest} + */ +public class SearchAndContextFormatterTest2 { + + private static final int TABSIZE = 8; + + private static final List TEMP_DIRS = new ArrayList<>(); + private static RuntimeEnvironment env; + private static TestRepository repository1; + private static TestRepository repository2; + private static File configFile; + private static boolean skip = false; + private static boolean originalProjectsEnabled; + + @BeforeClass + public static void setUpClass() throws Exception { + env = RuntimeEnvironment.getInstance(); + + originalProjectsEnabled = env.isProjectsEnabled(); + env.setProjectsEnabled(true); + + File sourceRoot = createTemporaryDirectory("srcroot"); + assertTrue("sourceRoot.isDirectory()", sourceRoot.isDirectory()); + File dataroot = createTemporaryDirectory("dataroot"); + assertTrue("dataroot.isDirectory()", dataroot.isDirectory()); + + repository1 = new TestRepository(); + repository1.create(HistoryGuru.class.getResourceAsStream( + "repositories.zip")); + + repository2 = new TestRepository(); + repository2.create(HistoryGuru.class.getResourceAsStream( + "repositories.zip")); + + // Create symlink #1 underneath source root. + final String SYMLINK1 = "symlink1"; + File symlink1 = new File(sourceRoot.getCanonicalFile(), SYMLINK1); + Files.createSymbolicLink(Paths.get(symlink1.getPath()), + Paths.get(repository1.getSourceRoot())); + assertTrue("symlink1.exists()", symlink1.exists()); + + // Create symlink #2 underneath source root. + final String SYMLINK2 = "symlink2"; + File symlink2 = new File(sourceRoot.getCanonicalFile(), SYMLINK2); + Files.createSymbolicLink(Paths.get(symlink2.getPath()), + Paths.get(repository2.getSourceRoot())); + assertTrue("symlink2.exists()", symlink2.exists()); + + Set allowedSymlinks = new HashSet<>(); + allowedSymlinks.add(symlink1.getAbsolutePath()); + allowedSymlinks.add(symlink2.getAbsolutePath()); + env.setAllowedSymlinks(allowedSymlinks); + + env.setCtags(System.getProperty( + "org.opensolaris.opengrok.analysis.Ctags", "ctags")); + env.setSourceRoot(sourceRoot.getPath()); + env.setDataRoot(dataroot.getPath()); + RepositoryFactory.initializeIgnoredNames(env); + + if (env.validateExuberantCtags()) { + env.setVerbose(false); + env.setHistoryEnabled(false); + Indexer.getInstance().prepareIndexer(env, true, true, + new TreeSet<>(Arrays.asList(new String[]{"/c"})), + false, false, null, null, new ArrayList<>(), false); + + Project proj1 = env.getProjects().get(SYMLINK1); + assertNotNull("symlink1 project", proj1); + proj1.setTabSize(TABSIZE); + + Indexer.getInstance().doIndexerExecution(true, null, null); + } else { + System.out.println( + "Skipping test. Could not find a ctags I could use in path."); + skip = true; + } + + configFile = File.createTempFile("configuration", ".xml"); + env.writeConfiguration(configFile); + RuntimeEnvironment.getInstance().readConfiguration(new File( + configFile.getAbsolutePath())); + } + + @AfterClass + public static void tearDownClass() throws Exception { + env.setProjectsEnabled(originalProjectsEnabled); + env.setAllowedSymlinks(new HashSet<>()); + + if (repository1 != null) { + repository1.destroy(); + } + if (repository2 != null) { + repository2.destroy(); + } + if (configFile != null) { + configFile.delete(); + } + skip = false; + + try { + TEMP_DIRS.forEach((tempDir) -> { + try { + IOUtils.removeRecursive(tempDir.toPath()); + } catch (IOException e) { + // ignore + } + }); + } finally { + TEMP_DIRS.clear(); + } + } + + @Before + public void setUp() { + } + + @After + public void tearDown() { + } + + @Test + public void testSearch() throws IOException, InvalidTokenOffsetsException { + if (skip) { + return; + } + + SearchEngine instance; + int noHits; + + instance = new SearchEngine(); + instance.setFreetext("Hello"); + instance.setFile("renamed2.c"); + noHits = instance.search(); + assertTrue("noHits should be positive", noHits > 0); + String[] frags = getFirstFragments(instance); + assertNotNull("getFirstFragments() should return something", frags); + assertTrue("frags should have one element", frags.length == 1); + assertNotNull("frags[0] should be defined", frags[0]); + + final String CTX = + "16
" + + "17 printf ( "Hello, world!\\n" );
" + + "18
"; + assertLinesEqual("ContextFormatter output", CTX, frags[0]); + instance.destroy(); + } + + private String[] getFirstFragments(SearchEngine instance) + throws IOException, InvalidTokenOffsetsException { + + ContextArgs args = new ContextArgs((short)1, (short)10); + + /* + * The following `anz' should go unused, but UnifiedHighlighter demands + * an analyzer "even if in some circumstances it isn't used." + */ + PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE; + FileAnalyzer anz = fac.getAnalyzer(); + + ContextFormatter formatter = new ContextFormatter(args); + OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, + instance.getSearcher(), anz); + uhi.setBreakIterator(() -> new StrictLineBreakIterator()); + uhi.setFormatter(formatter); + uhi.setTabSize(TABSIZE); + + ScoreDoc[] docs = instance.scoreDocs(); + for (int i = 0; i < docs.length; ++i) { + int docid = docs[i].doc; + Document doc = instance.doc(docid); + + String path = doc.get(QueryBuilder.PATH); + System.out.println(path); + formatter.setUrl("/source" + path); + + for (String contextField : + instance.getQueryBuilder().getContextFields()) { + + Map res = uhi.highlightFields( + new String[]{contextField}, instance.getQueryObject(), + new int[] {docid}, new int[] {10}); + String[] frags = res.getOrDefault(contextField, null); + if (frags != null) { + return frags; + } + } + } + return null; + } + + private static File createTemporaryDirectory(String name) + throws IOException { + File f = FileUtilities.createTemporaryDirectory(name); + TEMP_DIRS.add(f); + return f; + } +} diff --git a/test/org/opensolaris/opengrok/search/context/StrictLineBreakIteratorTest.java b/test/org/opensolaris/opengrok/search/context/StrictLineBreakIteratorTest.java new file mode 100644 index 00000000000..2e6314e30d9 --- /dev/null +++ b/test/org/opensolaris/opengrok/search/context/StrictLineBreakIteratorTest.java @@ -0,0 +1,161 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, Chris Fraire . + */ + +package org.opensolaris.opengrok.search.context; + +import java.text.BreakIterator; +import static org.junit.Assert.assertEquals; +import org.junit.Test; + +/** + * Represents a container for tests of {@link StrictLineBreakIterator}. + */ +public class StrictLineBreakIteratorTest { + + @Test + public void testStandardLineBreakIteratorWithUnixLFs() { + final String DOC = "abc\ndef\nghi"; + BreakIterator it = BreakIterator.getLineInstance(); + it.setText(DOC); + + assertEquals("StrictLineBreakIterator current()", 0, it.current()); + assertEquals("StrictLineBreakIterator next()", 4, it.next()); + assertEquals("StrictLineBreakIterator next()", 8, it.next()); + assertEquals("StrictLineBreakIterator next()", 11, it.next()); + assertEquals("StrictLineBreakIterator next()", BreakIterator.DONE, + it.next()); + assertEquals("StrictLineBreakIterator current()", 11, it.current()); + } + + @Test + public void testBreakingWithUnixLFs1() { + final String DOC = "abc\ndef\nghi"; + StrictLineBreakIterator it = new StrictLineBreakIterator(); + it.setText(DOC); + + assertEquals("StrictLineBreakIterator current()", 0, it.current()); + assertEquals("StrictLineBreakIterator next()", 4, it.next()); + assertEquals("StrictLineBreakIterator next()", 8, it.next()); + assertEquals("StrictLineBreakIterator next()", 11, it.next()); + assertEquals("StrictLineBreakIterator next()", BreakIterator.DONE, + it.next()); + assertEquals("StrictLineBreakIterator current()", 11, it.current()); + } + + @Test + public void testBreakingWithUnixLFs2() { + final String DOC = "\nabc\ndef\nghi"; + StrictLineBreakIterator it = new StrictLineBreakIterator(); + it.setText(DOC); + + assertEquals("StrictLineBreakIterator current()", 0, it.current()); + assertEquals("StrictLineBreakIterator next()", 1, it.next()); + assertEquals("StrictLineBreakIterator next()", 5, it.next()); + assertEquals("StrictLineBreakIterator next()", 9, it.next()); + assertEquals("StrictLineBreakIterator next()", 12, it.next()); + assertEquals("StrictLineBreakIterator next()", BreakIterator.DONE, + it.next()); + assertEquals("StrictLineBreakIterator current()", 12, it.current()); + } + + @Test + public void testBreakingWithWindowsLFs() { + final String DOC = "abc\r\ndef\r\nghi"; + StrictLineBreakIterator it = new StrictLineBreakIterator(); + it.setText(DOC); + + assertEquals("StrictLineBreakIterator next()", 5, it.next()); + assertEquals("StrictLineBreakIterator next()", 10, it.next()); + assertEquals("StrictLineBreakIterator next()", 13, it.next()); + assertEquals("StrictLineBreakIterator next()", BreakIterator.DONE, + it.next()); + assertEquals("StrictLineBreakIterator current()", 13, it.current()); + } + + @Test + public void testBreakingWithMacLFs() { + final String DOC = "abc\rdef\rghi"; + StrictLineBreakIterator it = new StrictLineBreakIterator(); + it.setText(DOC); + + assertEquals("StrictLineBreakIterator next()", 4, it.next()); + assertEquals("StrictLineBreakIterator next()", 8, it.next()); + assertEquals("StrictLineBreakIterator next()", 11, it.next()); + assertEquals("StrictLineBreakIterator next()", BreakIterator.DONE, + it.next()); + assertEquals("StrictLineBreakIterator current()", 11, it.current()); + } + + @Test + public void testBreakingWithOddLFs() { + final String DOC = "abc\n\rdef\r\nghi"; + StrictLineBreakIterator it = new StrictLineBreakIterator(); + it.setText(DOC); + + assertEquals("StrictLineBreakIterator next()", 4, it.next()); + assertEquals("StrictLineBreakIterator next()", 5, it.next()); + assertEquals("StrictLineBreakIterator next()", 10, it.next()); + assertEquals("StrictLineBreakIterator next()", 13, it.next()); + assertEquals("StrictLineBreakIterator next()", BreakIterator.DONE, + it.next()); + } + + @Test + public void testTraversal() { + final String DOC = "abc\ndef\nghi"; + StrictLineBreakIterator it = new StrictLineBreakIterator(); + it.setText(DOC); + + assertEquals("StrictLineBreakIterator next()", 4, it.next()); + assertEquals("StrictLineBreakIterator next()", 8, it.next()); + assertEquals("StrictLineBreakIterator previous()", 4, it.previous()); + assertEquals("StrictLineBreakIterator previous()", 0, it.previous()); + assertEquals("StrictLineBreakIterator previous()", BreakIterator.DONE, + it.previous()); + assertEquals("StrictLineBreakIterator next()", 4, it.next()); + assertEquals("StrictLineBreakIterator next()", 8, it.next()); + assertEquals("StrictLineBreakIterator next()", 11, it.next()); + assertEquals("StrictLineBreakIterator next()", BreakIterator.DONE, + it.next()); + + assertEquals("StrictLineBreakIterator first()", 0, it.first()); + assertEquals("StrictLineBreakIterator next()", 4, it.next()); + + assertEquals("StrictLineBreakIterator last()", 11, it.last()); + assertEquals("StrictLineBreakIterator previous()", 8, it.previous()); + } + + @Test + public void testForEmptyString() { + final String DOC = ""; + StrictLineBreakIterator it = new StrictLineBreakIterator(); + it.setText(DOC); + + assertEquals("StrictLineBreakIterator next()", BreakIterator.DONE, + it.next()); + assertEquals("StrictLineBreakIterator first()", 0, it.first()); + assertEquals("StrictLineBreakIterator last()", DOC.length(), it.last()); + assertEquals("StrictLineBreakIterator previous()", BreakIterator.DONE, + it.previous()); + } +} diff --git a/web/more.jsp b/web/more.jsp index 65baa569570..3791819738a 100644 --- a/web/more.jsp +++ b/web/more.jsp @@ -19,8 +19,8 @@ information: Portions Copyright [yyyy] [name of copyright owner] CDDL HEADER END Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. - Portions Copyright 2011 Jens Elkner. +Portions Copyright (c) 2018, Chris Fraire . --%><%@page errorPage="error.jsp" import=" java.io.FileInputStream, @@ -29,11 +29,14 @@ java.nio.charset.StandardCharsets, java.util.logging.Level, java.util.logging.Logger, +org.apache.lucene.search.IndexSearcher, org.apache.lucene.search.Query, +org.opensolaris.opengrok.configuration.RuntimeEnvironment, org.opensolaris.opengrok.search.QueryBuilder, org.opensolaris.opengrok.search.context.Context, org.opensolaris.opengrok.logger.LoggerFactory, -org.opensolaris.opengrok.util.IOUtils" +org.opensolaris.opengrok.util.IOUtils, +org.opensolaris.opengrok.web.SearchHelper" %> <% { @@ -48,21 +51,64 @@ file="mast.jsp" /* ---------------------- more.jsp start --------------------- */ { PageConfig cfg = PageConfig.get(request); - QueryBuilder qbuilder = cfg.getQueryBuilder(); + File resourceFile = cfg.getResourceFile(); + String path = cfg.getPath(); + RuntimeEnvironment env = cfg.getEnv(); + Project activeProject = Project.getProject(resourceFile); + + QueryBuilder qbuilder = null; + SearchHelper searchHelper = null; + int docId = -1; + int tabSize = 0; + + if (activeProject == null) { + qbuilder = cfg.getQueryBuilder(); + } else { + searchHelper = cfg.prepareInternalSearch(); + /* + * N.b. searchHelper.destroy() is called via + * WebappListener.requestDestroyed() on presence of the following + * REQUEST_ATTR. + */ + request.setAttribute(SearchHelper.REQUEST_ATTR, searchHelper); + searchHelper.prepareExec(activeProject); + if (searchHelper.searcher != null) { + docId = searchHelper.searchSingle(resourceFile); + qbuilder = searchHelper.builder; + searchHelper.prepareSummary(); + tabSize = searchHelper.getTabSize(activeProject); + } + } try { Query tquery = qbuilder.build(); if (tquery != null) { - Context sourceContext = new Context(tquery, qbuilder.getQueries()); %>

Lines Matching <%= tquery %>

<%
-            // SRCROOT is read with UTF-8 as a default.
-            Reader r = IOUtils.createBOMStrippedReader(new FileInputStream(
-                cfg.getResourceFile()), StandardCharsets.UTF_8.name());
-            sourceContext.getContext(r, out,
-                request.getContextPath() + Prefix.XREF_P, null, cfg.getPath(),
-                null, false, false, null, null);
+            String xrefPrefix = request.getContextPath() + Prefix.XREF_P;
+            boolean didPresentNew = false;
+            if (docId >= 0) {
+                didPresentNew = searchHelper.sourceContext.getContext2(env,
+                    searchHelper.searcher, docId, out, xrefPrefix, null, false,
+                    tabSize);
+            }
+            if (!didPresentNew) {
+                /**
+                 * Fall back to the old view, which re-analyzes text using
+                 * PlainLinetokenizer. E.g., when source code is updated (thus
+                 * affecting timestamps) but re-indexing is not yet complete.
+                 */
+                Context sourceContext = new Context(tquery, qbuilder);
+                sourceContext.toggleAlt();
+                // SRCROOT is read with UTF-8 as a default.
+                try (Reader r = IOUtils.createBOMStrippedReader(
+                        new FileInputStream(resourceFile),
+                        StandardCharsets.UTF_8.name())) {
+                    sourceContext.getContext(r, out, xrefPrefix, null, path,
+                        null, false, false, null, null);
+                }
+            }
     %>
<% }