Skip to content
This repository has been archived by the owner on Oct 31, 2019. It is now read-only.
/ xquery Public archive

Extract data or evaluate value from HTML/XML documents using XPath

License

Notifications You must be signed in to change notification settings

antchfx/xquery

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

70 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

xquery

Build Status Coverage Status GoDoc Go Report Card

NOTE: This package is deprecated. Recommends use htmlquery and xmlquery package, get latest version to fixed some issues.

Overview

Golang package, lets you extract data from HTML/XML documents using XPath expression.

List of supported XPath functions you can found here XPath Package.

Installation

go get github.com/antchfx/xquery

HTML Query GoDoc

Extract data from HTML document.

package main

import (
	"github.com/antchfx/xpath"
	"github.com/antchfx/xquery/html"
)

func main() {
	// Load HTML file.
	f, err := os.Open(`./examples/test.html`)
	if err != nil {
		panic(err)
	}
	// Parse HTML document.
	doc, err := htmlquery.Parse(f)
	if err != nil{
		panic(err)
	}

	// Option 1: using xpath's expr to matches nodes.
	expr := xpath.MustCompile("count(//div[@class='article'])")
	fmt.Printf("%f \n", expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64))

	expr = xpath.MustCompile("//a/@href")
	iter := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(*xpath.NodeIterator)
	for iter.MoveNext() {
		fmt.Printf("%s \n", iter.Current().Value()) // output href
	}

	// Option 2: using build-in functions Find() to matches nodes.
	for _, n := range htmlquery.Find(doc, "//a/@href") {
		fmt.Printf("%s \n", htmlquery.SelectAttr(n, "href")) // output href
	}
}

XML Query GoDoc

Extract data from XML document.

package main

import (
	"github.com/antchfx/xpath"
	"github.com/antchfx/xquery/xml"
)

func main() {
	// Load XML document from file.
	f, err := os.Open(`./examples/test.xml`)
	if err != nil {
		panic(err)
	}
	// Parse XML document.
	doc, err := xmlquery.Parse(f)
	if err != nil{
		panic(err)
	}

	// Option 1: using xpath's expr to matches nodes.

	// sum all book's price via Evaluate()
	expr, err := xpath.Compile("sum(//book/price)")
	if err != nil {
		panic(err)
	}
	fmt.Printf("total price: %f\n", expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64))

	for _, n := range xmlquery.Find(doc, "//book") {
		fmt.Printf("%s : %s \n", n.SelectAttr("id"), xmlquery.FindOne(n, "title").InnerText())
	}
	
	// Option 2: using build-in functions FindOne() to matches node.
	n := xmlquery.FindOne(doc, "//book[@id='bk104']")
	fmt.Printf("%s \n", n.OutputXML(true))
}