summaryrefslogtreecommitdiffstats
path: root/branches/sca-java-1.x/modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java
diff options
context:
space:
mode:
Diffstat (limited to 'branches/sca-java-1.x/modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java')
-rw-r--r--branches/sca-java-1.x/modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java289
1 files changed, 143 insertions, 146 deletions
diff --git a/branches/sca-java-1.x/modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java b/branches/sca-java-1.x/modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java
index c25e5937ee..c0b5bcb218 100644
--- a/branches/sca-java-1.x/modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java
+++ b/branches/sca-java-1.x/modules/domain-search/src/main/java/org/apache/tuscany/sca/domain/search/impl/DomainPathAnalyzer.java
@@ -32,205 +32,202 @@ import org.apache.lucene.analysis.Tokenizer;
*/
public class DomainPathAnalyzer extends Analyzer {
- final public static char PATH_START = '\u0001';
-
- final public static char PATH_SEPARATOR = '\u0002';
+ final public static char PATH_START = '\u0001';
- final public static char TYPE_SEPARATOR = '\u0003';
+ final public static char PATH_SEPARATOR = '\u0002';
- final public static char URI_SEPARATOR = '\u0004';
-
- final public static char ARCHIVE_SEPARATOR = '!';
+ final public static char TYPE_SEPARATOR = '\u0003';
- static class DomainPathTokenizer extends Tokenizer {
+ final public static char URI_SEPARATOR = '\u0004';
- private int offset = 0, bufferIndex = 0, dataLen = 0;
- private static final int MAX_WORD_LEN = 1024;
- private static final int IO_BUFFER_SIZE = 4096;
- private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
- private boolean typeCharFound = false;
- private boolean uriCharFound = false;
+ final public static char ARCHIVE_SEPARATOR = '!';
- public DomainPathTokenizer(Reader reader) {
- super(reader);
- }
+ static class DomainPathTokenizer extends Tokenizer {
- @Override
- public void reset() throws IOException {
- super.reset();
+ private int offset = 0, bufferIndex = 0, dataLen = 0;
+ private static final int MAX_WORD_LEN = 1024;
+ private static final int IO_BUFFER_SIZE = 4096;
+ private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
+ private boolean typeCharFound = false;
+ private boolean uriCharFound = false;
- typeCharFound = false;
- uriCharFound = false;
+ public DomainPathTokenizer(Reader reader) {
+ super(reader);
+ }
- }
+ @Override
+ public void reset() throws IOException {
+ super.reset();
- @Override
- public void reset(Reader input) throws IOException {
- super.reset(input);
+ typeCharFound = false;
+ uriCharFound = false;
- uriCharFound = false;
- typeCharFound = false;
+ }
- }
+ @Override
+ public void reset(Reader input) throws IOException {
+ super.reset(input);
- @Override
- public Token next(Token reusableToken) throws IOException {
- assert reusableToken != null;
- reusableToken.clear();
- int length = 0;
- int start = bufferIndex;
- char[] buffer = reusableToken.termBuffer();
+ uriCharFound = false;
+ typeCharFound = false;
- boolean lowercaseCharFound = false;
- boolean digitFound = false;
+ }
- while (true) {
+ @Override
+ public Token next(Token reusableToken) throws IOException {
+ assert reusableToken != null;
+ reusableToken.clear();
+ int length = 0;
+ int start = bufferIndex;
+ char[] buffer = reusableToken.termBuffer();
- if (bufferIndex >= dataLen) {
- offset += dataLen;
- int incr;
+ boolean lowercaseCharFound = false;
+ boolean digitFound = false;
- if (lowercaseCharFound || length == 0) {
- incr = 0;
+ while (true) {
- } else {
- incr = 2;
- ioBuffer[0] = ioBuffer[bufferIndex - 1];
- ioBuffer[1] = ioBuffer[bufferIndex];
+ if (bufferIndex >= dataLen) {
+ offset += dataLen;
+ int incr;
- }
+ if (lowercaseCharFound || length == 0) {
+ incr = 0;
- dataLen = input
- .read(ioBuffer, incr, ioBuffer.length - incr);
- if (dataLen == -1) {
- if (length > 0)
- break;
- else
- return null;
- }
- bufferIndex = incr;
- dataLen += incr;
+ } else {
+ incr = 2;
+ ioBuffer[0] = ioBuffer[bufferIndex - 1];
+ ioBuffer[1] = ioBuffer[bufferIndex];
- }
+ }
- final char c = ioBuffer[bufferIndex++];
- boolean breakChar = true;
- boolean includeChar = false;
-
- if (c == PATH_START || c == PATH_SEPARATOR) {
+ dataLen = input.read(ioBuffer, incr, ioBuffer.length - incr);
+ if (dataLen == -1) {
+ if (length > 0)
+ break;
+ else
+ return null;
+ }
+ bufferIndex = incr;
+ dataLen += incr;
- if (length == 0) {
- includeChar = true;
+ }
- } else {
- bufferIndex--;
- }
+ final char c = ioBuffer[bufferIndex++];
+ boolean breakChar = true;
+ boolean includeChar = false;
- typeCharFound = false;
- uriCharFound = false;
+ if (c == PATH_START || c == PATH_SEPARATOR) {
- } else if (c == TYPE_SEPARATOR && !typeCharFound
- || c == URI_SEPARATOR && !uriCharFound) {
- length = 0;
- breakChar = false;
- lowercaseCharFound = false;
- digitFound = false;
+ if (length == 0) {
+ includeChar = true;
- } else {
+ } else {
+ bufferIndex--;
+ }
- if (Character.isDigit(c)) {
+ typeCharFound = false;
+ uriCharFound = false;
- if (digitFound || length == 0) {
- breakChar = false;
- digitFound = true;
+ } else if (c == TYPE_SEPARATOR && !typeCharFound || c == URI_SEPARATOR && !uriCharFound) {
+ length = 0;
+ breakChar = false;
+ lowercaseCharFound = false;
+ digitFound = false;
- } else {
- bufferIndex--;
- }
+ } else {
- // TODO: normalize accent, it does not index accents for
- // now
- } else if (c >= 65 && c <= 90 || c >= 97 && c <= 122) {
+ if (Character.isDigit(c)) {
- if (digitFound) {
- bufferIndex--;
+ if (digitFound || length == 0) {
+ breakChar = false;
+ digitFound = true;
- } else if (Character.isLowerCase(c)) {
+ } else {
+ bufferIndex--;
+ }
- if (!(lowercaseCharFound || length <= 1)) {
- length--;
- bufferIndex -= 2;
+ // TODO: normalize accent, it does not index accents for
+ // now
+ } else if (c >= 65 && c <= 90 || c >= 97 && c <= 122) {
- } else {
- lowercaseCharFound = true;
- breakChar = false;
+ if (digitFound) {
+ bufferIndex--;
- }
+ } else if (Character.isLowerCase(c)) {
- } else if (!lowercaseCharFound) { // && uppercase
- breakChar = false;
+ if (!(lowercaseCharFound || length <= 1)) {
+ length--;
+ bufferIndex -= 2;
- } else {
- bufferIndex--;
- }
+ } else {
+ lowercaseCharFound = true;
+ breakChar = false;
- }
+ }
- }
+ } else if (!lowercaseCharFound) { // && uppercase
+ breakChar = false;
- if (!breakChar || includeChar) {
+ } else {
+ bufferIndex--;
+ }
- if (length == 0) // start of token
- start = offset + bufferIndex - 1;
- else if (length == buffer.length)
- buffer = reusableToken.resizeTermBuffer(1 + length);
+ }
- if (c == TYPE_SEPARATOR && !typeCharFound) {
- typeCharFound = true;
+ }
- } else if (c == URI_SEPARATOR && !uriCharFound) {
- typeCharFound = true;
+ if (!breakChar || includeChar) {
- } else {
- buffer[length++] = Character.toLowerCase(c); // buffer
- // it,
- // normalized
- }
+ if (length == 0) // start of token
+ start = offset + bufferIndex - 1;
+ else if (length == buffer.length)
+ buffer = reusableToken.resizeTermBuffer(1 + length);
- if (length == MAX_WORD_LEN || (breakChar && length > 0)) // buffer
- // overflow!
- break;
+ if (c == TYPE_SEPARATOR && !typeCharFound) {
+ typeCharFound = true;
- } else if (length > 0) {// at non-Letter w/ chars
+ } else if (c == URI_SEPARATOR && !uriCharFound) {
+ typeCharFound = true;
- break; // return 'em
+ } else {
+ buffer[length++] = Character.toLowerCase(c); // buffer
+ // it,
+ // normalized
+ }
- }
+ if (length == MAX_WORD_LEN || (breakChar && length > 0)) // buffer
+ // overflow!
+ break;
- }
+ } else if (length > 0) {// at non-Letter w/ chars
- reusableToken.setTermLength(length);
- reusableToken.setStartOffset(start);
- reusableToken.setEndOffset(start + length);
+ break; // return 'em
- return reusableToken;
+ }
- }
- }
+ }
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new DomainPathTokenizer(reader);
- }
+ reusableToken.setTermLength(length);
+ reusableToken.setStartOffset(start);
+ reusableToken.setEndOffset(start + length);
- public TokenStream reusableTokenStream(String fieldName, Reader reader)
- throws IOException {
- Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
- if (tokenizer == null) {
- tokenizer = new DomainPathTokenizer(reader);
- setPreviousTokenStream(tokenizer);
- } else
- tokenizer.reset(reader);
- return tokenizer;
- }
+ return reusableToken;
+
+ }
+ }
+
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new DomainPathTokenizer(reader);
+ }
+
+ public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+ Tokenizer tokenizer = (Tokenizer)getPreviousTokenStream();
+ if (tokenizer == null) {
+ tokenizer = new DomainPathTokenizer(reader);
+ setPreviousTokenStream(tokenizer);
+ } else
+ tokenizer.reset(reader);
+ return tokenizer;
+ }
}