From: gotty Date: Mon, 20 Jan 2020 05:53:44 +0000 (+0300) Subject: [LIB-13] Functional for working with strings moved to StringUtils X-Git-Url: https://git.hedgecode.org/?a=commitdiff_plain;h=6e3a8590a26312b6cea579777db885107cae88df;p=chesshog-scanner.git [LIB-13] Functional for working with strings moved to StringUtils --- diff --git a/src/main/java/org/hedgecode/chess/scanner/ScannerType.java b/src/main/java/org/hedgecode/chess/scanner/ScannerType.java index 49217ea..3b0b467 100644 --- a/src/main/java/org/hedgecode/chess/scanner/ScannerType.java +++ b/src/main/java/org/hedgecode/chess/scanner/ScannerType.java @@ -29,7 +29,8 @@ public enum ScannerType { LICHESS ( TYPE_LICHESS, DOMAIN_LICHESS ), CHESSBOMB ( TYPE_CHESSBOMB, DOMAIN_CHESSBOMB ), CHESS24 ( TYPE_CHESS24, DOMAIN_CHESS24 ), - CHESSCOM ( TYPE_CHESSCOM, DOMAIN_CHESSCOM ); + CHESSCOM ( TYPE_CHESSCOM, DOMAIN_CHESSCOM ), + CHESS2700 ( TYPE_2700CHESS, DOMAIN_2700CHESS ); private String type; private String domain; @@ -58,8 +59,9 @@ public enum ScannerType { public static ScannerType byHost(String host) { if (host != null) { for (ScannerType scannerType : ScannerType.values()) { - if (host.contains(scannerType.domain)) + if (StringUtils.belongDomain(scannerType.domain, host)) { return scannerType; + } } } return null; diff --git a/src/main/java/org/hedgecode/chess/scanner/StringUtils.java b/src/main/java/org/hedgecode/chess/scanner/StringUtils.java new file mode 100644 index 0000000..6a0f5c5 --- /dev/null +++ b/src/main/java/org/hedgecode/chess/scanner/StringUtils.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019-2020. Developed by Hedgecode. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hedgecode.chess.scanner; + +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.hedgecode.chess.scanner.format.PGNConstants; + +/** + * StringUtils + * + * @author Dmitry Samoshin aka gotty + */ +public final class StringUtils { + + private static final String EMPTY = ""; + private static final char BACKSLASH = '\\'; + + private static final String CRLF_REGEX = "(\\\\r)?\\\\n"; + private static final String SHIELD_REGEX = "\\\\"; + + private static final String DOMAIN_REGEX_FORMAT = "(\\w+\\.)*\\b%s"; + + public static String match(String source, String regex) { + Matcher matcher = Pattern.compile( + regex, + Pattern.MULTILINE + ).matcher(source); + if (matcher.find()) { + return matcher.groupCount() > 0 + ? matcher.group(1) + : matcher.group(); + } + return null; + } + + public static boolean isPgn(String source) { + return match( + source, + PGNConstants.PGN_DETECT_REGEX + ) != null; + } + + public static String shield(String source, char[] shields) { + for (char shield : shields) { + if (source.indexOf(shield) >= 0) { + String regexShield = + shield == BACKSLASH + ? SHIELD_REGEX + : String.valueOf(shield); + source = source.replaceAll( + String.format("([%s])", regexShield), + SHIELD_REGEX.concat("$1") + ); + } + } + return source; + } + + public static String unshield(String source) { + return source.replaceAll(SHIELD_REGEX, EMPTY); + } + + public static String formatCrlf(String source) { + return unshield( + source.replaceAll( + CRLF_REGEX, PGNConstants.PGN_CRLF + ) + ); + } + + public static boolean belongDomain(String domain, URL url) { + return belongDomain( + domain, url.getHost() + ); + } + + public static boolean belongDomain(String domain, String host) { + return host.matches( + String.format(DOMAIN_REGEX_FORMAT, domain) + ); + } + + private StringUtils() { + throw new AssertionError( + String.format("No %s instances!", getClass().getName()) + ); + } + +} diff --git a/src/main/java/org/hedgecode/chess/scanner/format/PGNTag.java b/src/main/java/org/hedgecode/chess/scanner/format/PGNTag.java index 10d6fec..74a066e 100644 --- a/src/main/java/org/hedgecode/chess/scanner/format/PGNTag.java +++ b/src/main/java/org/hedgecode/chess/scanner/format/PGNTag.java @@ -19,6 +19,8 @@ package org.hedgecode.chess.scanner.format; import java.util.Arrays; import java.util.Comparator; +import org.hedgecode.chess.scanner.StringUtils; + /** * PGNTag * @@ -64,6 +66,7 @@ public enum PGNTag { MODE ( "Mode", false, PGNTag.EMPTY ), PLY_COUNT ( "PlyCount", false, PGNTag.EMPTY ); + // todo: -> PGNConstants public static final String EMPTY = ""; public static final String HYPHEN = "-"; public static final String ZERO = "0"; @@ -74,6 +77,8 @@ public enum PGNTag { public static final String TAG_FORMAT = "[%s \"%s\"]"; + public static final char[] TAG_SHIELD_CHARS = { '\\', '"' }; + private String tagName; private boolean isRequired; private String defaultValue; @@ -96,6 +101,12 @@ public enum PGNTag { return defaultValue; } + public static String formatTagValue(String tagValue) { + return StringUtils.shield( + tagValue, TAG_SHIELD_CHARS + ); + } + public static PGNTag[] tags() { PGNTag[] tags = values(); Arrays.sort( diff --git a/src/main/java/org/hedgecode/chess/scanner/portal/ChessGamesScanner.java b/src/main/java/org/hedgecode/chess/scanner/portal/ChessGamesScanner.java index 247660a..56fc032 100644 --- a/src/main/java/org/hedgecode/chess/scanner/portal/ChessGamesScanner.java +++ b/src/main/java/org/hedgecode/chess/scanner/portal/ChessGamesScanner.java @@ -22,6 +22,7 @@ import java.util.Map; import org.hedgecode.chess.scanner.Scanner; import org.hedgecode.chess.scanner.ScannerException; +import org.hedgecode.chess.scanner.StringUtils; import org.hedgecode.chess.scanner.entity.PGNGame; import org.hedgecode.chess.scanner.entity.PGNTournament; @@ -75,7 +76,7 @@ public class ChessGamesScanner extends AbstractSettingsScanner implements Scanne String pgn = request( assignUrl(gameId) ); - if (!isPgnFormat(pgn)) { + if (!StringUtils.isPgn(pgn)) { throw new ScannerException( String.format("Failed to get PGN for requesting game ID: %s", gameId) ); @@ -92,13 +93,12 @@ public class ChessGamesScanner extends AbstractSettingsScanner implements Scanne @Override public PGNGame scanUrl(String gameUrl) throws ScannerException { - String pgn = regex( - request( - gameUrl - ), + String gamePage = request(gameUrl); + String pgn = StringUtils.match( + gamePage, getSettings().getGameUrlRegex() ); - if (pgn == null || !isPgnFormat(pgn)) { + if (pgn == null || !StringUtils.isPgn(pgn)) { throw new ScannerException( String.format("Failed to get PGN for requesting URL: %s", gameUrl) ); diff --git a/src/main/java/org/hedgecode/chess/scanner/portal/LiChessScanner.java b/src/main/java/org/hedgecode/chess/scanner/portal/LiChessScanner.java index 55a53e9..820e513 100644 --- a/src/main/java/org/hedgecode/chess/scanner/portal/LiChessScanner.java +++ b/src/main/java/org/hedgecode/chess/scanner/portal/LiChessScanner.java @@ -22,12 +22,13 @@ import org.apache.commons.text.StringEscapeUtils; import org.hedgecode.chess.scanner.Scanner; import org.hedgecode.chess.scanner.ScannerException; +import org.hedgecode.chess.scanner.StringUtils; import org.hedgecode.chess.scanner.entity.PGNGame; import org.hedgecode.chess.scanner.entity.PGNTournament; import org.hedgecode.chess.scanner.format.lichess.Format; import org.hedgecode.chess.scanner.format.lichess.GameFormat; -import static org.hedgecode.chess.scanner.ScannerConstants.*; +import static org.hedgecode.chess.scanner.format.PGNConstants.*; /** * LiChessScanner @@ -62,7 +63,7 @@ public class LiChessScanner extends AbstractSettingsScanner implements Scanner { String pgn = request( assignUrl(gameId) ); - if (!isPgnFormat(pgn)) { + if (!StringUtils.isPgn(pgn)) { throw new ScannerException( String.format("Failed to get PGN for requesting game ID: %s", gameId) ); @@ -80,12 +81,12 @@ public class LiChessScanner extends AbstractSettingsScanner implements Scanner { @Override public PGNGame scanUrl(String gameUrl) throws ScannerException { String gamePage = request(gameUrl); - String pgn = regex( + String pgn = StringUtils.match( gamePage, getSettings().getGameUrlRegex() ); if (pgn == null) { - pgn = regex( + pgn = StringUtils.match( gamePage, getSettings().getGameJsonUrlRegex() ); @@ -101,13 +102,16 @@ public class LiChessScanner extends AbstractSettingsScanner implements Scanner { ); } else { pgn = StringEscapeUtils.unescapeHtml4(pgn); - if (!isPgnFormat(pgn)) { + if (!StringUtils.isPgn(pgn)) { throw new ScannerException( String.format("Failed to get PGN for requesting URL: %s", gameUrl) ); } return new PGNGame( - regex(pgn, getSettings().getGameIdRegex()), + StringUtils.match( + pgn, + getSettings().getGameIdRegex() + ), pgn ); } @@ -125,7 +129,7 @@ public class LiChessScanner extends AbstractSettingsScanner implements Scanner { if (!pgnGames.isEmpty()) { tournament.setName( - regex( + StringUtils.match( pgnGames.get(0), getSettings().getTournamentNameRegex() ) @@ -133,7 +137,7 @@ public class LiChessScanner extends AbstractSettingsScanner implements Scanner { } for (String pgn : pgnGames) { - String gameId = regex( + String gameId = StringUtils.match( pgn, getSettings().getGameIdRegex() );