[LIB-8] URL and HTML tag processing, new entities fields

[snooker-score-api.git] / src / main / java / org / hedgecode / snooker / SnookerURLUtils.java
diff --git a/src/main/java/org/hedgecode/snooker/SnookerURLUtils.java b/src/main/java/org/hedgecode/snooker/SnookerURLUtils.java

new file mode 100644 (file)

index 0000000..c345d78
--- /dev/null
+++ b/src/main/java/org/hedgecode/snooker/SnookerURLUtils.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2017-2019. Developed by Hedgecode.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.hedgecode.snooker;
+
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.imageio.ImageIO;
+
+import org.hedgecode.snooker.api.APIException;
+import org.hedgecode.snooker.api.SnookerURL;
+
+/**
+ * Utils for working with URLs.
+ *
+ * @author Dmitry Samoshin aka gotty
+ */
+public final class SnookerURLUtils {
+
+    private static final String CRLF = System.lineSeparator();
+
+    private static final String HTTP_REGEX = "(http[s]?://.+)";
+    private static final String ANCHOR_REGEX = "<a href=\"([^\"]+)\"[^>]*>([^<]+)</a>";
+
+    private static final Pattern HTTP_PATTERN = Pattern.compile(HTTP_REGEX);
+    private static final Pattern ANCHOR_PATTERN = Pattern.compile(ANCHOR_REGEX);
+
+    private static final String BR_REGEX = "<[Bb][Rr][ /]*>";
+    private static final String TAG_REGEX = "<[^>]+>";
+
+    private static final String TWITTER_URL = "https://twitter.com/";
+    private static final String TWITTER_HASHTAG = "hashtag/";
+
+    public static List<SnookerURL> parseUrls(Map<String, String> htmlStrings) throws APIException {
+        List<SnookerURL> result = new ArrayList<>();
+        htmlStrings.forEach( (name, htmlString) -> {
+            Matcher matcher = ANCHOR_PATTERN.matcher(htmlString);
+            while (matcher.find()) {
+                try {
+                    URL url = new URL(matcher.group(1));
+                    String text = matcher.group(2);
+                    result.add(
+                            new SnookerURL(text, url)
+                    );
+                } catch (IOException ignored) {
+                }
+            }
+        });
+        return result;
+    }
+
+    public static List<SnookerURL> assignUrls(Map<String, String> urlStrings) throws APIException {
+        List<SnookerURL> result = new ArrayList<>();
+        urlStrings.forEach( (name, urlString) -> {
+            Matcher matcher = HTTP_PATTERN.matcher(urlString);
+            if (matcher.find()) {
+                try {
+                    URL url = new URL(matcher.group(1));
+                    result.add(
+                            new SnookerURL(name, url)
+                    );
+                } catch (IOException ignored) {
+                }
+            }
+        });
+        return result;
+    }
+
+    public static SnookerURL assignUrl(String name, String urlString) throws APIException {
+        SnookerURL result = null;
+        URL url = assignUrl(urlString);
+        if (url != null) {
+            result = new SnookerURL(name, url);
+        }
+        return result;
+    }
+
+    public static URL assignUrl(String urlString) throws APIException {
+        URL result = null;
+        if (urlString != null && !urlString.isEmpty()) {
+            Matcher matcher = HTTP_PATTERN.matcher(urlString);
+            if (matcher.find()) {
+                try {
+                    result = new URL(matcher.group(1));
+                } catch (IOException e) {
+                    throw new APIException(
+                            APIException.Type.INFO, "Failed to recognize URL: " + e.getMessage()
+                    );
+                }
+            }
+        }
+        return result;
+    }
+
+    public static String cutTags(String htmlString) {
+        if (htmlString != null && !htmlString.isEmpty()) {
+            return htmlString.replaceAll(
+                    BR_REGEX, CRLF
+            ).replaceAll(
+                    TAG_REGEX, ""
+            );
+        }
+        return htmlString;
+    }
+
+    public static BufferedImage loadImage(URL imageUrl) throws APIException {
+        BufferedImage result;
+        try {
+            result = ImageIO.read(imageUrl);
+        } catch (IOException e) {
+            throw new APIException(
+                    APIException.Type.INFO, "Failed to load image at the address: " + imageUrl
+            );
+        }
+        return result;
+    }
+
+    public static String twitterUrl(String twitterName) {
+        return twitterName != null && !twitterName.isEmpty()
+                ? TWITTER_URL.concat(twitterName)
+                : null;
+    }
+
+    public static String hashtagUrl(String twitterHashtag) {
+        return twitterHashtag != null && !twitterHashtag.isEmpty()
+                ? TWITTER_URL.concat(TWITTER_HASHTAG).concat(twitterHashtag)
+                : null;
+    }
+
+}