<chessHogVersion>0.1-SNAPSHOT</chessHogVersion>
<httpCoreVersion>4.4.11</httpCoreVersion>
<httpClientVersion>4.5.9</httpClientVersion>
+ <commonsTextVersion>1.8</commonsTextVersion>
<gsonVersion>2.8.0</gsonVersion>
<junitVersion>4.12</junitVersion>
<commonsConfigVersion>1.10</commonsConfigVersion>
<version>${httpClientVersion}</version>
</dependency>
<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-text</artifactId>
+ <version>${commonsTextVersion}</version>
+ </dependency>
+ <dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>${gsonVersion}</version>
public static final String DOMAIN_CHESS24 = "chess24.com";
public static final String DOMAIN_CHESSCOM = "chess.com";
+ public static final String PGN_DETECT_REGEX = "^\\[Event \"[^\"]+\"\\]$";
+
public static final String PROXY_UNDEFINED = "undefined";
public static final String PROXY_HTTP = "http";
public static final String PROXY_SOCKS = "socks";
String getTournamentGamesUrlRegex();
+ String getTournamentJsonUrlRegex();
+
+ String getTournamentNameRegex();
+
String getTournamentQuery();
String getTournamentQueryUrlRegex();
String getGameUrlRegex();
+ String getGameJsonUrlRegex();
+
+ String getGameIdRegex();
+
}
@SerializedName("tournamentGamesUrlRegex")
private String tournamentGamesUrlRegex;
+ @SerializedName("tournamentJsonUrlRegex")
+ private String tournamentJsonUrlRegex;
+
+ @SerializedName("tournamentNameRegex")
+ private String tournamentNameRegex;
+
@SerializedName("tournamentQuery")
private String tournamentQuery;
@SerializedName("gameUrlRegex")
private String gameUrlRegex;
+ @SerializedName("gameJsonUrlRegex")
+ private String gameJsonUrlRegex;
+
+ @SerializedName("gameIdRegex")
+ private String gameIdRegex;
+
@Override
public String getTournamentUrl() {
return tournamentUrl;
}
@Override
+ public String getTournamentJsonUrlRegex() {
+ return tournamentJsonUrlRegex;
+ }
+
+ @Override
+ public String getTournamentNameRegex() {
+ return tournamentNameRegex;
+ }
+
+ @Override
public String getTournamentQuery() {
return tournamentQuery;
}
return gameUrlRegex;
}
+ @Override
+ public String getGameJsonUrlRegex() {
+ return gameJsonUrlRegex;
+ }
+
+ @Override
+ public String getGameIdRegex() {
+ return gameIdRegex;
+ }
+
}
package org.hedgecode.chess.scanner.portal;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
import org.hedgecode.chess.scanner.ChessHogScannerException;
import org.hedgecode.chess.scanner.Initiable;
import org.hedgecode.chess.scanner.Settings;
import org.hedgecode.chess.scanner.regex.RegexParams;
import org.hedgecode.chess.scanner.spi.ServiceRegistry;
+import static org.hedgecode.chess.scanner.ChessHogScannerConstants.*;
import static org.hedgecode.chess.scanner.regex.RegexBuilder.Type;
/**
);
}
+ protected String regex(String source, String regex) {
+ Matcher matcher = Pattern.compile(regex, Pattern.MULTILINE).matcher(source);
+ if (matcher.find()) {
+ return matcher.groupCount() > 0 ? matcher.group(1) : matcher.group();
+ }
+ return null;
+ }
+
+ protected boolean isPgnFormat(String source) {
+ return regex(source, PGN_DETECT_REGEX) != null;
+ }
+
private String assignUrlWithParams(String url, String params) {
return params != null
? url.concat(params)
public PGNTournament scanTournament(String tournamentId) throws ChessHogScannerException {
String decodeTournament = decodeUrlByRegex(
assignUrl(tournamentId, null),
- getSettings().getTournamentGamesUrlRegex()
+ getSettings().getTournamentJsonUrlRegex()
);
TournamentFormat tournamentFormat = Format.formatTournament(decodeTournament);
@Override
public PGNGame scanGame(String gameId, String tournamentId) throws ChessHogScannerException {
- String decodeGame = decodeUrlByRegex(
+ return scanGameByRegex(
assignUrl(gameId, tournamentId, true),
- getSettings().getGameUrlRegex()
+ getSettings().getGameJsonUrlRegex()
+ );
+ }
+
+ @Override
+ public PGNGame scanUrl(String gameUrl) throws ChessHogScannerException {
+ return scanGameByRegex(
+ gameUrl,
+ getSettings().getGameJsonUrlRegex()
+ );
+ }
+
+ private PGNGame scanGameByRegex(String gameUrl, String regex) throws ChessHogScannerException {
+ String decodeGame = decodeUrlByRegex(
+ gameUrl,
+ regex
);
GameFormat gameFormat = Format.formatGame(decodeGame);
);
}
- @Override
- public PGNGame scanUrl(String gameUrl) throws ChessHogScannerException {
- return null;
- }
private String decodeUrlByRegex(String url, String regex) throws ChessHogScannerException {
String encodeString = match(
url,
regex
);
+ if (encodeString == null) {
+ throw new ChessHogScannerException(
+ String.format("Failed to decode source data for requesting URL: %s", url)
+ );
+ }
return new String(
- Base64.getDecoder().decode(encodeString)
+ Base64.getDecoder().decode(
+ encodeString
+ )
);
}
@Override
public PGNTournament findTournament(String tournamentName) throws ChessHogScannerException {
+ PGNTournament tournament = null;
Map<String, String> result = matchMap(
assignUrl(
tournamentName, true
getSettings().getTournamentQueryUrlRegex(),
true
);
- PGNTournament tournament = null;
for (Map.Entry<String, String> entry : result.entrySet()) {
if (entry.getValue().contains(tournamentName)) { // todo: contains?
tournament = new PGNTournament(
String pgn = request(
assignUrl(gameId)
);
+ if (!isPgnFormat(pgn)) {
+ throw new ChessHogScannerException(
+ String.format("Failed to get PGN for requesting game ID: %s", gameId)
+ );
+ }
return new PGNGame(
gameId, pgn
);
@Override
public PGNGame scanUrl(String gameUrl) throws ChessHogScannerException {
- return null;
+ String pgn = regex(
+ request(
+ gameUrl
+ ),
+ getSettings().getGameUrlRegex()
+ );
+ if (pgn == null || !isPgnFormat(pgn)) {
+ throw new ChessHogScannerException(
+ String.format("Failed to get PGN for requesting URL: %s", gameUrl)
+ );
+ }
+ return new PGNGame(
+ null,
+ pgn
+ );
}
private void assignTournamentGames(PGNTournament tournament) throws ChessHogScannerException {
package org.hedgecode.chess.scanner.portal;
import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+
+import org.apache.commons.text.StringEscapeUtils;
import org.hedgecode.chess.scanner.ChessHogScannerException;
import org.hedgecode.chess.scanner.entity.PGNGame;
import org.hedgecode.chess.scanner.entity.PGNTournament;
+import org.hedgecode.chess.scanner.format.lichess.Format;
+import org.hedgecode.chess.scanner.format.lichess.GameFormat;
+
+import static org.hedgecode.chess.scanner.ChessHogScannerConstants.*;
/**
* LiChessScanner
private static final String SETTINGS_FILENAME = "lichess.settings";
- private static final String TOURNAMENT_GAMES_SPLIT_REGEX = "\\[Event \"[^\"]+\"\\]";
- private static final String TOURNAMENT_NAME_REGEX = "\\[Event \"([^\"]+)\"\\]";
- private static final String GAME_ID_REGEX = "\\[Site \"https://lichess.org/([^\"]+)\"\\]";
-
@Override
protected String getResourceName() {
return SETTINGS_FILENAME;
String pgn = request(
assignUrl(gameId)
);
+ if (!isPgnFormat(pgn)) {
+ throw new ChessHogScannerException(
+ String.format("Failed to get PGN for requesting game ID: %s", gameId)
+ );
+ }
return new PGNGame(
gameId, pgn
);
@Override
public PGNGame scanUrl(String gameUrl) throws ChessHogScannerException {
- return null;
+ String gamePage = request(gameUrl);
+ String pgn = regex(
+ gamePage,
+ getSettings().getGameUrlRegex()
+ );
+ if (pgn == null) {
+ pgn = regex(
+ gamePage,
+ getSettings().getGameJsonUrlRegex()
+ );
+ if (pgn == null) {
+ throw new ChessHogScannerException(
+ String.format("Failed to get source data for requesting URL: %s", gameUrl)
+ );
+ }
+ GameFormat gameFormat = Format.formatGame(pgn);
+ return new PGNGame(
+ gameFormat.id(),
+ gameFormat.pgn()
+ );
+ } else {
+ pgn = StringEscapeUtils.unescapeHtml4(pgn);
+ if (!isPgnFormat(pgn)) {
+ throw new ChessHogScannerException(
+ String.format("Failed to get PGN for requesting URL: %s", gameUrl)
+ );
+ }
+ return new PGNGame(
+ regex(pgn, getSettings().getGameIdRegex()),
+ pgn
+ );
+ }
}
private void assignTournamentGames(PGNTournament tournament) throws ChessHogScannerException {
tournament.id(),
null
),
- TOURNAMENT_GAMES_SPLIT_REGEX
+ PGN_DETECT_REGEX
);
if (!pgnGames.isEmpty()) {
tournament.setName(
- find(TOURNAMENT_NAME_REGEX, pgnGames.get(0))
+ regex(
+ pgnGames.get(0),
+ getSettings().getTournamentNameRegex()
+ )
);
}
for (String pgn : pgnGames) {
- String gameId = find(GAME_ID_REGEX, pgn);
+ String gameId = regex(
+ pgn,
+ getSettings().getGameIdRegex()
+ );
tournament.addGame(
new PGNGame(gameId, pgn)
);
}
}
- private String find(String regex, String pgn) {
- Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
- Matcher matcher = pattern.matcher(pgn);
- if (matcher.find()) {
- return matcher.group(1);
- }
- return null;
- }
-
}
{
"tournamentUrl": "https://www.chessbomb.com/arena/[tournamentId]",
"tournamentIsMultiPage": false,
- "tournamentGamesUrlRegex": "cbConfigData=\"([^\"]+)\"",
+ "tournamentJsonUrlRegex": "cbConfigData=\"([^\"]+)\"",
"tournamentQuery": "https://www.chessbomb.com/arena/",
"tournamentQueryUrlRegex": "cbConfigData=\"([^\"]+)\"",
"gameUrl": "https://www.chessbomb.com/arena/[tournamentId]/[gameId]",
- "gameUrlRegex": "cbConfigData=\"([^\"]+)\""
+ "gameJsonUrlRegex": "cbConfigData=\"([^\"]+)\""
}
\ No newline at end of file
"tournamentQuery": "https://www.chessgames.com/perl/tournaments?query=[query]",
"tournamentQueryUrlRegex": "<a href=\"/perl/chess.pl\\?tid=([0-9]+)\">([^<]+)</a>",
"gameUrl": "https://www.chessgames.com/perl/chessgame?gid=[gameId]",
- "gamePgnUrl": "https://www.chessgames.com/perl/nph-chesspgn?gid=[gameId]&text=1"
+ "gamePgnUrl": "https://www.chessgames.com/perl/nph-chesspgn?gid=[gameId]&text=1",
+ "gameUrlRegex": "pgn='([^']+)'"
}
\ No newline at end of file
"tournamentUrl": "https://lichess.org/api/tournament/[tournamentId]/games",
"tournamentIsMultiPage": false,
"tournamentQueryParams": "?clocks=false&evals=false&opening=true",
+ "tournamentNameRegex": "\\[Event \"([^\"]+)\"\\]",
"gameUrl": "https://lichess.org/[gameId]",
"gamePgnUrl": "https://lichess.org/game/export/[gameId]",
- "gameQueryParams": "?clocks=false&evals=false&literate=true"
+ "gameQueryParams": "?clocks=false&evals=false&literate=true",
+ "gameUrlRegex": "<div class=\"pgn\">([^<]+)</div>",
+ "gameJsonUrlRegex": "<script[^>]+>.*=(\\{.*\"data\":\\{\"game\"[^<]+})</script>",
+ "gameIdRegex" : "\\[Site \"https://lichess.org/([^\"]+)\"\\]"
}
\ No newline at end of file