1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
| import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.junit.Test;
import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern;
public class Test {
@Test public void testRegex() { String input = "【丝路亚心】250gX4<font color=\"red\">核桃仁</font> 原味生<font color=\"red\">核桃核桃仁</font> 新疆特产, <font color=\"red\">123445</font>bcdef"; String regex = "<font[^>]+?>(.*?)<\\\\/font>"; Pattern compile = Pattern.compile(regex); Matcher matcher = compile.matcher(input); List<String> output = new ArrayList<>(); int count = 0; while (matcher.find()) { System.out.println("--" + count); output.add(matcher.group(count)); count++; }
System.out.println(output); }
@Test public void testHtml() {
String html = "<span style=\"color:red;border-radius:10px;background-color:blue;\">抢购</span> 我是标题我是标题我是标题<font color=\"red\">高亮</font>我是标题我是标题我是标题我是标题<font color=\"red\">高亮</font>我是标题";
Document doc = Jsoup.parse(html); System.out.println(doc); System.out.println("\n---->" + doc.text()); System.out.println("\n---->" + doc.getElementsByTag("span").get(0).html()); System.out.println("\n---->" + doc.getElementsByTag("font").get(0).html());
}
@Test public void testStyle() { String style = "position: absolute; width: 500px; height: 552px; color: red; background-color: blue;"; String width = "width"; String color = "color"; String backgroundColor = "background-color"; System.out.println(getFloat(style, width)); System.out.println(getString(style, color)); System.out.println(getString(style, backgroundColor)); }
public static double getFloat(String value, String property) { try { if (value.contains(property)) { value = value.substring(value.indexOf(property)); value = value.substring(0, value.contains(";") ? value.indexOf(";") : value.length()); String attr = value.substring(value.indexOf(":") + 1).trim(); return Double.parseDouble(attr.substring(0, attr.indexOf("px"))); } } catch (Exception e) { }
return 0; }
public static String getString(String value, String property) { try { if (value.contains(property)) { System.out.println(value); value = value.substring(value.indexOf(property)); value = value.substring(0, value.contains(";") ? value.indexOf(";") : value.length()); return value.substring(value.indexOf(":") + 1).trim(); } } catch (Exception e) { }
return ""; } }
|