From 9cfd5f43a9d43b931ea51aaba514e25ee50eedce Mon Sep 17 00:00:00 2001 From: gotty Date: Tue, 14 Jan 2020 00:53:56 +0300 Subject: [PATCH] [LIB-13] Modify RegexMatcher for searching by type --- .../chess/scanner/regex/AbstractRegexResult.java | 57 ++++++++ .../chess/scanner/regex/RegexBuilder.java | 66 +++++++++- .../chess/scanner/regex/RegexMatcher.java | 2 - .../chess/scanner/regex/RegexMatcherResult.java | 146 --------------------- .../hedgecode/chess/scanner/regex/RegexType.java | 76 +++++------ .../chess/scanner/regex/RegexTypeMatcher.java | 47 +++++++ .../chess/scanner/regex/type/RegexBlockFinder.java | 82 ++++++++++++ .../chess/scanner/regex/type/RegexFinder.java | 56 ++++++++ .../chess/scanner/regex/type/RegexSplitter.java | 94 +++++++++++++ 9 files changed, 437 insertions(+), 189 deletions(-) create mode 100644 src/main/java/org/hedgecode/chess/scanner/regex/AbstractRegexResult.java delete mode 100644 src/main/java/org/hedgecode/chess/scanner/regex/RegexMatcherResult.java create mode 100644 src/main/java/org/hedgecode/chess/scanner/regex/RegexTypeMatcher.java create mode 100644 src/main/java/org/hedgecode/chess/scanner/regex/type/RegexBlockFinder.java create mode 100644 src/main/java/org/hedgecode/chess/scanner/regex/type/RegexFinder.java create mode 100644 src/main/java/org/hedgecode/chess/scanner/regex/type/RegexSplitter.java diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/AbstractRegexResult.java b/src/main/java/org/hedgecode/chess/scanner/regex/AbstractRegexResult.java new file mode 100644 index 0000000..fefa6c6 --- /dev/null +++ b/src/main/java/org/hedgecode/chess/scanner/regex/AbstractRegexResult.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019-2020. Developed by Hedgecode. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hedgecode.chess.scanner.regex; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * AbstractRegexResult + * + * @author Dmitry Samoshin aka gotty + */ +public abstract class AbstractRegexResult implements RegexResult { + + private List resultList = new ArrayList<>(); + private Map resultMap = new HashMap<>(); + + protected void add(String value) { + resultList.add(value); + } + + protected void add(String key, String value) { + resultMap.put(key, value); + } + + @Override + public boolean isEmpty() { + return resultMap.isEmpty() && resultList.isEmpty(); + } + + @Override + public List resultList() { + return resultList; + } + + @Override + public Map resultMap() { + return resultMap; + } + +} diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/RegexBuilder.java b/src/main/java/org/hedgecode/chess/scanner/regex/RegexBuilder.java index 789882f..14b4fba 100644 --- a/src/main/java/org/hedgecode/chess/scanner/regex/RegexBuilder.java +++ b/src/main/java/org/hedgecode/chess/scanner/regex/RegexBuilder.java @@ -16,6 +16,11 @@ package org.hedgecode.chess.scanner.regex; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; + +import org.hedgecode.chess.scanner.ChessHogScannerConstants; + /** * RegexBuilder * @@ -23,11 +28,70 @@ package org.hedgecode.chess.scanner.regex; */ public class RegexBuilder { - public static String build(RegexType type, String target, RegexParams params) { + public static String build(Type type, String target, RegexParams params) { return type.format( target, params ); } + public enum Type { + + TOURNAMENT { + @Override + public String format(String target, RegexParams params) { + if (params.getPageId() != null) { + target = target.replace( + RegexParams.PAGE_ID, + params.getPageId() + ); + } + return target.replace( + RegexParams.TOURNAMENT_ID, + params.getTournamentId() + ); + } + }, + + GAME { + @Override + public String format(String target, RegexParams params) { + return target.replace( + RegexParams.GAME_ID, + params.getGameId() + ); + } + }, + + QUERY { + @Override + public String format(String target, RegexParams params) { + return target.replace( + RegexParams.QUERY, + params.isUrlEncode() + ? urlEncode(params.getQuery()) + : params.getQuery() + ); + } + }; + + public abstract String format(String target, RegexParams params); + + private static String urlEncode(String query) throws RuntimeException { + String encodeQuery; + try { + encodeQuery = URLEncoder.encode( + query, ChessHogScannerConstants.CHARSET.name() + ); + } catch (UnsupportedEncodingException cause) { + throw new RuntimeException( + String.format("Unsupported encoding: %s", ChessHogScannerConstants.CHARSET.name()), + cause + ); + } + return encodeQuery; + } + + } + } diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/RegexMatcher.java b/src/main/java/org/hedgecode/chess/scanner/regex/RegexMatcher.java index 8d7a8e9..88060d4 100644 --- a/src/main/java/org/hedgecode/chess/scanner/regex/RegexMatcher.java +++ b/src/main/java/org/hedgecode/chess/scanner/regex/RegexMatcher.java @@ -25,8 +25,6 @@ public interface RegexMatcher { void match(String input); - boolean isBreak(); - RegexResult result(); } diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/RegexMatcherResult.java b/src/main/java/org/hedgecode/chess/scanner/regex/RegexMatcherResult.java deleted file mode 100644 index 80bd66d..0000000 --- a/src/main/java/org/hedgecode/chess/scanner/regex/RegexMatcherResult.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2019-2020. Developed by Hedgecode. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.hedgecode.chess.scanner.regex; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * RegexMatcherResult - * - * @author Dmitry Samoshin aka gotty - */ -public class RegexMatcherResult implements RegexMatcher, RegexResult { - - private Pattern pattern; - private Pattern startPattern, endPattern; - private final boolean isSingle, isFirst, isMap; - - private boolean isMatch = false; - private boolean isBreak = false; - - private int matchNumber = 0; - - private List resultList = new ArrayList<>(); - private Map resultMap = new HashMap<>(); - - public RegexMatcherResult(String match, boolean isFirst) { - this(match, isFirst, false); - } - - public RegexMatcherResult(String match, boolean isFirst, boolean isMap) { - this.pattern = Pattern.compile(match); - this.isSingle = true; - this.isFirst = isFirst; - this.isMap = isMap; - } - - public RegexMatcherResult(String startMatch, String endMatch, boolean isFirst) { - this(startMatch, endMatch, isFirst, false); - } - - public RegexMatcherResult(String startMatch, String endMatch, boolean isFirst, boolean isMap) { - this.startPattern = Pattern.compile(startMatch); - this.endPattern = Pattern.compile(endMatch); - this.isSingle = false; - this.isFirst = isFirst; - this.isMap = isMap; - } - - @Override - public void match(String input) { - Matcher matcher; - if (isSingle) { - matcher = pattern.matcher(input); - if (matcher.find()) { - addMatch(matcher); - if (isFirst) { - isBreak = true; - } - } - } else { - matcher = isMatch ? endPattern.matcher(input) : startPattern.matcher(input); - if (matcher.find()) { - add(input); - if (isMatch && isFirst) { - isBreak = true; - } - isMatch = !isMatch; - } else { - if (isMatch) { - add(input); - } - } - } - } - - private void add(String input) { - if (isMap) { - resultMap.put( - Integer.toString(matchNumber++), - input - ); - } else { - resultList.add( - input - ); - } - } - - private void addMatch(Matcher matcher) { - if (isMap) { - resultMap.put( - matcher.group(1), - matcher.group(2) - ); - } else { - resultList.add( - matcher.group(1) - ); - } - } - - @Override - public boolean isBreak() { - return isBreak; - } - - @Override - public RegexResult result() { - return this; - } - - @Override - public boolean isEmpty() { - return isMap ? resultMap.isEmpty() : resultList.isEmpty(); - } - - @Override - public List resultList() { - return resultList; - } - - @Override - public Map resultMap() { - return resultMap; - } - -} diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/RegexType.java b/src/main/java/org/hedgecode/chess/scanner/regex/RegexType.java index c63af8c..cfe9c5f 100644 --- a/src/main/java/org/hedgecode/chess/scanner/regex/RegexType.java +++ b/src/main/java/org/hedgecode/chess/scanner/regex/RegexType.java @@ -16,10 +16,9 @@ package org.hedgecode.chess.scanner.regex; -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; - -import org.hedgecode.chess.scanner.ChessHogScannerConstants; +import org.hedgecode.chess.scanner.regex.type.RegexBlockFinder; +import org.hedgecode.chess.scanner.regex.type.RegexFinder; +import org.hedgecode.chess.scanner.regex.type.RegexSplitter; /** * RegexType @@ -28,48 +27,45 @@ import org.hedgecode.chess.scanner.ChessHogScannerConstants; */ public enum RegexType { - TOURNAMENT { - @Override - public String format(String target, RegexParams params) { - if (params.getPageId() != null) { - target = target.replace( - RegexParams.PAGE_ID, - params.getPageId() - ); + FIND, + SPLIT, + BLOCK; + + public static RegexMatcher matcher(RegexType type, String match) { + return Factory.create( + type, + match + ); + } + + public static RegexMatcher matcher(String startMatch, String endMatch) { + return Factory.create( + startMatch, + endMatch + ); + } + + static class Factory { + + static RegexMatcher create(RegexType type, String match) { + switch (type) { + case FIND: + return new RegexFinder(match); + case SPLIT: + return new RegexSplitter(match); + case BLOCK: + return new RegexBlockFinder(match, match); } - return target.replace( - RegexParams.TOURNAMENT_ID, - params.getTournamentId() - ); + return null; } - }, - GAME { - @Override - public String format(String target, RegexParams params) { - return target.replace( - RegexParams.GAME_ID, - params.getGameId() - ); + static RegexMatcher create(String startMatch, String endMatch) { + return new RegexBlockFinder(startMatch, endMatch); } - }, - QUERY { - @Override - public String format(String target, RegexParams params) { - try { - return target.replace( - RegexParams.QUERY, - params.isUrlEncode() - ? URLEncoder.encode(params.getQuery(), ChessHogScannerConstants.CHARSET.name()) - : params.getQuery() - ); - } catch (UnsupportedEncodingException e) { - return null; - } + private Factory() { } - }; - public abstract String format(String target, RegexParams params); + } } diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/RegexTypeMatcher.java b/src/main/java/org/hedgecode/chess/scanner/regex/RegexTypeMatcher.java new file mode 100644 index 0000000..8647753 --- /dev/null +++ b/src/main/java/org/hedgecode/chess/scanner/regex/RegexTypeMatcher.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019-2020. Developed by Hedgecode. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hedgecode.chess.scanner.regex; + +/** + * RegexTypeMatcher + * + * @author Dmitry Samoshin aka gotty + */ +public class RegexTypeMatcher implements RegexMatcher { + + private RegexMatcher matcher; + + public RegexTypeMatcher(RegexType type, String match) { + matcher = RegexType.matcher(type, match); + } + + public RegexTypeMatcher(String startMatch, String endMatch) { + matcher = RegexType.matcher(startMatch, endMatch); + } + + + @Override + public void match(String input) { + matcher.match(input); + } + + @Override + public RegexResult result() { + return matcher.result(); + } + +} diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexBlockFinder.java b/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexBlockFinder.java new file mode 100644 index 0000000..a8951d8 --- /dev/null +++ b/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexBlockFinder.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2019-2020. Developed by Hedgecode. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hedgecode.chess.scanner.regex.type; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.hedgecode.chess.scanner.ChessHogScannerConstants; +import org.hedgecode.chess.scanner.regex.AbstractRegexResult; +import org.hedgecode.chess.scanner.regex.RegexMatcher; +import org.hedgecode.chess.scanner.regex.RegexResult; + +/** + * RegexBlockFinder + * + * @author Dmitry Samoshin aka gotty + */ +public class RegexBlockFinder extends AbstractRegexResult implements RegexMatcher { + + private Pattern startPattern, endPattern; + private boolean isMatch; + private StringBuilder current; + private String key; + + public RegexBlockFinder(String startMatch, String endMatch) { + startPattern = Pattern.compile(startMatch); + endPattern = Pattern.compile(endMatch); + isMatch = false; + current = new StringBuilder(); + } + + @Override + public void match(String input) { + Matcher matcher = !isMatch ? startPattern.matcher(input) : endPattern.matcher(input); + if (matcher.find()) { + addCurrent(input); + if (isMatch) { + if (key != null) { + add(key, current.toString()); + } else { + add(current.toString()); + } + current = new StringBuilder(); + } else { + key = matcher.groupCount() > 0 ? matcher.group(1) : null; + } + isMatch = !isMatch; + } else { + if (isMatch) { + addCurrent(input); + } + } + } + + private void addCurrent(String input) { + current.append( + input + ).append( + ChessHogScannerConstants.CRLF + ); + } + + @Override + public RegexResult result() { + return this; + } + +} diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexFinder.java b/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexFinder.java new file mode 100644 index 0000000..9a1f897 --- /dev/null +++ b/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexFinder.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019-2020. Developed by Hedgecode. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hedgecode.chess.scanner.regex.type; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.hedgecode.chess.scanner.regex.AbstractRegexResult; +import org.hedgecode.chess.scanner.regex.RegexMatcher; +import org.hedgecode.chess.scanner.regex.RegexResult; + +/** + * RegexFinder + * + * @author Dmitry Samoshin aka gotty + */ +public class RegexFinder extends AbstractRegexResult implements RegexMatcher { + + private Pattern pattern; + + public RegexFinder(String match) { + pattern = Pattern.compile(match); + } + + @Override + public void match(String input) { + Matcher matcher = pattern.matcher(input); + if (matcher.find()) { + if (matcher.groupCount() > 1) { + add(matcher.group(1), matcher.group(2)); + } else { + add(matcher.group(1)); + } + } + } + + @Override + public RegexResult result() { + return this; + } + +} diff --git a/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexSplitter.java b/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexSplitter.java new file mode 100644 index 0000000..fe7b5a5 --- /dev/null +++ b/src/main/java/org/hedgecode/chess/scanner/regex/type/RegexSplitter.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019-2020. Developed by Hedgecode. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hedgecode.chess.scanner.regex.type; + +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.hedgecode.chess.scanner.ChessHogScannerConstants; +import org.hedgecode.chess.scanner.regex.AbstractRegexResult; +import org.hedgecode.chess.scanner.regex.RegexMatcher; +import org.hedgecode.chess.scanner.regex.RegexResult; + +/** + * RegexSplitter + * + * @author Dmitry Samoshin aka gotty + */ +public class RegexSplitter extends AbstractRegexResult implements RegexMatcher { + + private Pattern pattern; + private StringBuilder current; + private String key; + + public RegexSplitter(String match) { + pattern = Pattern.compile(match); + current = new StringBuilder(); + } + + @Override + public void match(String input) { + Matcher matcher = pattern.matcher(input); + if (matcher.find()) { + if (current.length() > 0) { + if (key != null) { + add(key, current.toString()); + } else { + add(current.toString()); + } + current = new StringBuilder(); + } else { + key = matcher.groupCount() > 0 ? matcher.group(1) : null; + } + } + addCurrent(input); + } + + private void addCurrent(String input) { + current.append( + input + ).append( + ChessHogScannerConstants.CRLF + ); + } + + @Override + public RegexResult result() { + return this; + } + + @Override + public List resultList() { + if (current.length() > 0) { + add(current.toString()); + current = new StringBuilder(); + } + return super.resultList(); + } + + @Override + public Map resultMap() { + if (current.length() > 0) { + add(key, current.toString()); + current = new StringBuilder(); + } + return super.resultMap(); + } + +} -- 2.10.0