序
BerkeleyDB在爬虫、搜索领域里头用的比较多,整体来讲的特点是嵌入式、kv数据库,功能强大,能支持几百T的存储。本文主要讲述怎么在java里头使用它。
添加依赖
<!-- Berkeley DB Java Edition -->
<dependency>
<groupId>com.sleepycat</groupId>
<artifactId>je</artifactId>
<!--<version>5.0.73</version>-->
<version>6.4.9</version>
</dependency>
如果是5以上的版本,则需要添加oracle的仓库
<repositories>
<repository>
<id>oracleReleases</id>
<name>Oracle Released Java Packages</name>
<url>http://download.oracle.com/maven</url>
<layout>default</layout>
</repository>
</repositories>
使用方式
在java里头主要有两种使用方式,一种是基于注解形式的,一种是原始的api使用。本文主要是用注解形式的。
领域模型
/* An entity class. */
@Entity
public class Person {
@PrimaryKey
String ssn;
String name;
Address address;
@SecondaryKey(relate = MANY_TO_ONE, relatedEntity = Person.class)
String parentSsn;
@SecondaryKey(relate = ONE_TO_MANY)
Set<String> emailAddresses = new HashSet<String>();
@SecondaryKey(relate = MANY_TO_MANY,
relatedEntity = Employer.class,
onRelatedEntityDelete = NULLIFY)
Set<Long> employerIds = new HashSet<Long>();
public Person(String name, String ssn, String parentSsn) {
this.name = name;
this.ssn = ssn;
this.parentSsn = parentSsn;
}
private Person() {
} // For deserialization
public String getSsn() {
return ssn;
}
public void setSsn(String ssn) {
this.ssn = ssn;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Address getAddress() {
return address;
}
public void setAddress(Address address) {
this.address = address;
}
public String getParentSsn() {
return parentSsn;
}
public void setParentSsn(String parentSsn) {
this.parentSsn = parentSsn;
}
public Set<String> getEmailAddresses() {
return emailAddresses;
}
public void setEmailAddresses(Set<String> emailAddresses) {
this.emailAddresses = emailAddresses;
}
public Set<Long> getEmployerIds() {
return employerIds;
}
public void setEmployerIds(Set<Long> employerIds) {
this.employerIds = employerIds;
}
}
内嵌对象
/* A persistent class used in other classes. */
@Persistent
public class Address {
String street;
String city;
String state;
int zipCode;
public Address() {
} // For deserialization
public String getStreet() {
return street;
}
public void setStreet(String street) {
this.street = street;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getState() {
return state;
}
public void setState(String state) {
this.state = state;
}
public int getZipCode() {
return zipCode;
}
public void setZipCode(int zipCode) {
this.zipCode = zipCode;
}
}
关联
@Entity
public class Employer {
@PrimaryKey(sequence = "ID")
private long id;
@SecondaryKey(relate = ONE_TO_ONE)
private String name;
private Address address;
public Employer(String name) {
this.name = name;
}
private Employer() {
} // For deserialization
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Address getAddress() {
return address;
}
public void setAddress(Address address) {
this.address = address;
}
}
初始化及关闭操作
private Environment myEnv;
private EntityStore store;
private PrimaryIndex<String, Inventory> inventoryBySku;
private PrimaryIndex<String, Vendor> vendorByName;
private SecondaryIndex<String, String, Inventory> inventoryByName;
/* Employer accessors */
PrimaryIndex<Long, Employer> employerById;
SecondaryIndex<String, Long, Employer> employerByName;
/* Person accessors */
PrimaryIndex<String, Person> personBySsn;
SecondaryIndex<String, String, Person> personByParentSsn;
SecondaryIndex<String, String, Person> personByEmailAddresses;
SecondaryIndex<Long, String, Person> personByEmployerIds;
private File envHome = new File(System.getProperty("user.dir") + File.separator + "bdb");
private boolean readOnly = false;
@Before
public void prepare() {
EnvironmentConfig myEnvConfig = new EnvironmentConfig();
StoreConfig storeConfig = new StoreConfig();
myEnvConfig.setReadOnly(readOnly);
storeConfig.setReadOnly(readOnly);
// If the environment is opened for write, then we want to be
// able to create the environment and entity store if
// they do not exist.
myEnvConfig.setAllowCreate(!readOnly);
storeConfig.setAllowCreate(!readOnly);
// Open the environment and entity store
System.out.println(envHome.getAbsolutePath());
if (!envHome.exists()) {
envHome.mkdir();
}
myEnv = new Environment(envHome, myEnvConfig);
store = new EntityStore(myEnv, "EntityStore", storeConfig);
// Primary key for Inventory classes
inventoryBySku = store.getPrimaryIndex(String.class, Inventory.class);
// Secondary key for Inventory classes
// Last field in the getSecondaryIndex() method must be
// the name of a class member; in this case, an Inventory.class
// data member.
inventoryByName = store.getSecondaryIndex(inventoryBySku, String.class, "itemName");
// Primary key for Vendor class
vendorByName = store.getPrimaryIndex(String.class, Vendor.class);
employerById = store.getPrimaryIndex(Long.class, Employer.class);
employerByName = store.getSecondaryIndex(employerById, String.class, "name");
personBySsn = store.getPrimaryIndex(String.class, Person.class);
personByParentSsn = store.getSecondaryIndex(personBySsn, String.class, "parentSsn");
personByEmailAddresses = store.getSecondaryIndex(personBySsn, String.class, "emailAddresses");
personByEmployerIds = store.getSecondaryIndex(personBySsn, Long.class, "employerIds");
}
@After
public void close() {
if (store != null) {
try {
store.close();
} catch (DatabaseException dbe) {
dbe.printStackTrace();
}
}
if (myEnv != null) {
try {
// Finally, close the store and environment.
myEnv.close();
} catch (DatabaseException dbe) {
dbe.printStackTrace();
}
}
}
增删改查
添加数据
@Test
public void putData() throws IOException {
List<String> readLines = Resources.readLines(this.getClass().getClassLoader().getResource("vendors.txt"), Charsets.UTF_8);
for (String data : readLines) {
String[] sArray = data.split("#");
Vendor theVendor = new Vendor();
theVendor.setVendorName(sArray[0]);
theVendor.setAddress(sArray[1]);
theVendor.setCity(sArray[2]);
theVendor.setState(sArray[3]);
theVendor.setZipcode(sArray[4]);
theVendor.setBusinessPhoneNumber(sArray[5]);
theVendor.setRepName(sArray[6]);
theVendor.setRepPhoneNumber(sArray[7]);
// Put it in the store. Because we do not explicitly set
// a transaction here, and because the store was opened
// with transactional support, auto commit is used for each
// write to the store.
vendorByName.put(theVendor);
}
// Primary key for Inventory classes
PrimaryIndex<String, Inventory> inventoryBySku = store.getPrimaryIndex(
String.class, Inventory.class);
List<String> data = Resources.readLines(this.getClass().getClassLoader().getResource("inventory.txt"), Charsets.UTF_8);
for (String row : data) {
String[] sArray = row.split("#");
Inventory theInventory = new Inventory();
theInventory.setItemName(sArray[0]);
theInventory.setSku(sArray[1]);
theInventory.setVendorPrice((new Float(sArray[2])).floatValue());
theInventory.setVendorInventory((new Integer(sArray[3])).intValue());
theInventory.setCategory(sArray[4]);
theInventory.setVendor(sArray[5]);
// Put it in the store. Note that this causes our secondary key
// to be automatically updated for us.
inventoryBySku.put(theInventory);
}
}
查询数据
@Test
public void getInventoryData() {
// Use the inventory name secondary key to retrieve
// these objects.
EntityCursor<Inventory> items =
inventoryByName.subIndex("Oranges").entities();
try {
for (Inventory item : items) {
System.out.println(ToStringBuilder.reflectionToString(item));
}
} finally {
items.close();
}
}
@Test
public void getAllInventory() {
// Get a cursor that will walk every
// inventory object in the store.
EntityCursor<Inventory> items = inventoryBySku.entities();
try {
for (Inventory item : items) {
System.out.println(ToStringBuilder.reflectionToString(item));
}
} finally {
items.close();
}
}
更新
如果不开启允许重复记录的话,put就是更新
@Test
public void update() {
String pk = "apple-for-update";
Inventory theInventory = new Inventory();
theInventory.setItemName("Apples");
theInventory.setSku(pk);
theInventory.setVendorPrice(1.20f);
theInventory.setVendorInventory(728);
theInventory.setCategory("fruits");
theInventory.setVendor("Off the Vine");
inventoryBySku.put(theInventory);
Inventory inventory = inventoryBySku.get(pk);
System.out.println(ToStringBuilder.reflectionToString(inventory));
inventory.setVendor("vendor update");
inventoryBySku.put(inventory);
System.out.println(ToStringBuilder.reflectionToString(inventoryBySku.get(pk)));
}
删除
@Test
public void delete() {
String pk = "apple-for-update";
Inventory theInventory = new Inventory();
theInventory.setItemName("Apples");
theInventory.setSku(pk);
theInventory.setVendorPrice(1.20f);
theInventory.setVendorInventory(728);
theInventory.setCategory("fruits");
theInventory.setVendor("Off the Vine");
inventoryBySku.put(theInventory);
Inventory inventory = inventoryBySku.get(pk);
System.out.println(ToStringBuilder.reflectionToString(inventory));
boolean rs = inventoryBySku.delete(pk);
Assert.assertTrue(rs);
Assert.assertNull(inventoryBySku.get(pk));
}
统计
@Test
public void count(){
EntityCursor<Employer> cursor = null;
try{
cursor = employerById.entities();
int count = -1;
if(cursor.next() != null){
count = cursor.count();
}
System.out.println("employee count:" + count);
}finally {
if(cursor != null){
cursor.close();
}
}
}
级联及主键自增情况
@Test
public void sequencePk() {
/*
* Add a parent and two children using the Person primary index.
* Specifying a non-null parentSsn adds the child Person to the
* sub-index of children for that parent key.
*/
personBySsn.put(new Person("Bob Smith", "111-11-1111", null));
personBySsn.put(new Person("Mary Smith", "333-33-3333", "111-11-1111"));
personBySsn.put(new Person("Jack Smith", "222-22-2222", "111-11-1111"));
/* Print the children of a parent using a sub-index and a cursor. */
EntityCursor<Person> children = personByParentSsn.subIndex("111-11-1111").entities();
try {
for (Person child : children) {
System.out.println(child.getSsn() + ' ' + child.getName());
}
} finally {
children.close();
}
/* Get Bob by primary key using the primary index. */
Person bob = personBySsn.get("111-11-1111");
Assert.assertNotNull(bob);
/*
* Create two employers if they do not already exist. Their primary
* keys are assigned from a sequence.
*/
Employer gizmoInc = employerByName.get("Gizmo Inc");
if (gizmoInc == null) {
gizmoInc = new Employer("Gizmo Inc");
employerById.put(gizmoInc);
}
Employer gadgetInc = employerByName.get("Gadget Inc");
if (gadgetInc == null) {
gadgetInc = new Employer("Gadget Inc");
employerById.put(gadgetInc);
}
/* Bob has two jobs and two email addresses. */
bob.getEmployerIds().add(gizmoInc.getId());
bob.getEmployerIds().add(gadgetInc.getId());
bob.getEmailAddresses().add("bob@bob.com");
bob.getEmailAddresses().add("bob@gmail.com");
/* Update Bob's record. */
personBySsn.put(bob);
/* Bob can now be found by both email addresses. */
bob = personByEmailAddresses.get("bob@bob.com");
Assert.assertNotNull(bob);
bob = personByEmailAddresses.get("bob@gmail.com");
Assert.assertNotNull(bob);
/* Bob can also be found as an employee of both employers. */
EntityIndex<String, Person> employees;
employees = personByEmployerIds.subIndex(gizmoInc.getId());
Assert.assertTrue( employees.contains("111-11-1111"));
employees = personByEmployerIds.subIndex(gadgetInc.getId());
Assert.assertTrue(employees.contains("111-11-1111"));
/*
* When an employer is deleted, the onRelatedEntityDelete=NULLIFY for
* the employerIds key causes the deleted ID to be removed from Bob's
* employerIds.
*/
employerById.delete(gizmoInc.getId());
bob = personBySsn.get("111-11-1111");
Assert.assertNotNull(bob);
Assert.assertFalse(bob.getEmployerIds().contains(gizmoInc.getId()));
}
@Test
public void cursor() {
CursorConfig cc = new CursorConfig();
// This is ignored if the store is not opened with uncommitted read
// support.
cc.setReadUncommitted(true);
EntityCursor<Employer> employers = employerById.entities(null, cc);
try{
for(Employer employer : employers){
System.out.println(ToStringBuilder.reflectionToString(employer));
}
}finally{
employers.close();
}
}
本工程github