-
Enhancement
-
Resolution: Unresolved
-
P4
-
None
A DESCRIPTION OF THE REQUEST :
The AbstractSet.removeAll() method has two strategies:
1. Iterate through the collection to be removed and remove every element.
2. Iterate through itself, checking whether the element is in the collection and should be removed.
The strategy is chosen based on the size of each collection - the smallest collection is iterated through. If the collection to be removed is a List with an equal or larger size than *this* the method completes in O(n^2) time rather than O(n) time. This is because List implementations usually exhibit O(n) time for their contains() methods, whereas Set implementations usually exhibit O(1) time.
JUSTIFICATION :
O(n) time is more desirable than O(n^2) time
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Removing any collection from a Set should work in O(n) time with n being the size of the collection to be removed
ACTUAL -
Removing a large List from a Set of equal or smaller size exhibits O(n^2) time
---------- BEGIN SOURCE ----------
package main;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
public class HashSetWithFastRemoveAll<T> extends HashSet<T> {
private static final long serialVersionUID = 1L;
@Override
public boolean removeAll(Collection<?> c) {
/*
* copied verbatim from AbstractSet implementation apart from
* "|| instanceof List"
*/
boolean modified = false;
if (size() > c.size() || c instanceof List) {
for (Iterator<?> i = c.iterator(); i.hasNext();)
modified |= remove(i.next());
} else {
for (Iterator<?> i = iterator(); i.hasNext();) {
if (c.contains(i.next())) {
i.remove();
modified = true;
}
}
}
return modified;
}
// ===testing code===
public static void main(String[] args) {
Set<String> elements = new HashSet<>();
for (int i = 0; i < 20_000; i++) {
elements.add(randomString());
}
testSet(true, elements, new HashSet<>());
testSet(true, elements, new HashSetWithFastRemoveAll<>());
testSet(false, elements, new HashSet<>());
testSet(false, elements, new HashSetWithFastRemoveAll<>());
}
private static void testSet(boolean isWarmUp, Set<String> elements, Set<String> set) {
Set<String> removed = new HashSet<>();
for (int i = 0; i < 20_000; i++) {
removed.add(randomString());
}
// removing all elements is ~4x faster
// removed = elements;
List<String> largeList = new ArrayList<>(removed);
Set<String> largeSet = new HashSet<>(removed);
set.clear();
set.addAll(elements);
double listMS = timeMS(() -> set.removeAll(largeList));
set.clear();
set.addAll(elements);
double setMS = timeMS(() -> set.removeAll(largeSet));
if (!isWarmUp) {
System.out.println("Testing: " + set.getClass());
System.out.println("Remove all from List: " + listMS);
System.out.println("Remove all from Set: " + setMS);
System.out.println();
}
}
private static double timeMS(Runnable runnable) {
long pre = System.nanoTime();
runnable.run();
long post = System.nanoTime();
return 1E-6 * (post - pre);
}
private static String randomString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 5; i++) {
char ch = (char) (Math.random() * 26 + 'a');
sb.append(ch);
}
return sb.toString();
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
In AbstractSet.removeAll() method, a precheck can be added to the size comparison ie.
if (size() > c.size()) {
can be replaced with
if (size() > c.size() || c instanceof List) {
The AbstractSet.removeAll() method has two strategies:
1. Iterate through the collection to be removed and remove every element.
2. Iterate through itself, checking whether the element is in the collection and should be removed.
The strategy is chosen based on the size of each collection - the smallest collection is iterated through. If the collection to be removed is a List with an equal or larger size than *this* the method completes in O(n^2) time rather than O(n) time. This is because List implementations usually exhibit O(n) time for their contains() methods, whereas Set implementations usually exhibit O(1) time.
JUSTIFICATION :
O(n) time is more desirable than O(n^2) time
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Removing any collection from a Set should work in O(n) time with n being the size of the collection to be removed
ACTUAL -
Removing a large List from a Set of equal or smaller size exhibits O(n^2) time
---------- BEGIN SOURCE ----------
package main;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
public class HashSetWithFastRemoveAll<T> extends HashSet<T> {
private static final long serialVersionUID = 1L;
@Override
public boolean removeAll(Collection<?> c) {
/*
* copied verbatim from AbstractSet implementation apart from
* "|| instanceof List"
*/
boolean modified = false;
if (size() > c.size() || c instanceof List) {
for (Iterator<?> i = c.iterator(); i.hasNext();)
modified |= remove(i.next());
} else {
for (Iterator<?> i = iterator(); i.hasNext();) {
if (c.contains(i.next())) {
i.remove();
modified = true;
}
}
}
return modified;
}
// ===testing code===
public static void main(String[] args) {
Set<String> elements = new HashSet<>();
for (int i = 0; i < 20_000; i++) {
elements.add(randomString());
}
testSet(true, elements, new HashSet<>());
testSet(true, elements, new HashSetWithFastRemoveAll<>());
testSet(false, elements, new HashSet<>());
testSet(false, elements, new HashSetWithFastRemoveAll<>());
}
private static void testSet(boolean isWarmUp, Set<String> elements, Set<String> set) {
Set<String> removed = new HashSet<>();
for (int i = 0; i < 20_000; i++) {
removed.add(randomString());
}
// removing all elements is ~4x faster
// removed = elements;
List<String> largeList = new ArrayList<>(removed);
Set<String> largeSet = new HashSet<>(removed);
set.clear();
set.addAll(elements);
double listMS = timeMS(() -> set.removeAll(largeList));
set.clear();
set.addAll(elements);
double setMS = timeMS(() -> set.removeAll(largeSet));
if (!isWarmUp) {
System.out.println("Testing: " + set.getClass());
System.out.println("Remove all from List: " + listMS);
System.out.println("Remove all from Set: " + setMS);
System.out.println();
}
}
private static double timeMS(Runnable runnable) {
long pre = System.nanoTime();
runnable.run();
long post = System.nanoTime();
return 1E-6 * (post - pre);
}
private static String randomString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 5; i++) {
char ch = (char) (Math.random() * 26 + 'a');
sb.append(ch);
}
return sb.toString();
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
In AbstractSet.removeAll() method, a precheck can be added to the size comparison ie.
if (size() > c.size()) {
can be replaced with
if (size() > c.size() || c instanceof List) {
- relates to
-
JDK-6394757 (coll) AbstractSet.removeAll semantics are surprisingly dependent on relative sizes
- Open