001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.net.nntp; 019 020import java.util.Arrays; 021import java.util.HashMap; 022import java.util.List; 023import java.util.Map; 024 025/** 026 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 027 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 028 * For his Java implementation, see 029 * <a href="https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java"> 030 * https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 031 */ 032public class Threader { 033 034 /** 035 * 036 * @param threadable 037 * @param idTable 038 */ 039 private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) { 040 String id = threadable.messageThreadId(); 041 NntpThreadContainer container = idTable.get(id); 042 int bogusIdCount = 0; 043 044 // A NntpThreadContainer exists for this id already. This should be a forward reference, but may 045 // be a duplicate id, in which case we will need to generate a bogus placeholder id 046 if (container != null) { 047 if (container.threadable != null) { // oops! duplicate ids... 048 bogusIdCount++; // Avoid dead local store warning 049 id = "<Bogus-id:" + bogusIdCount + ">"; 050 container = null; 051 } else { 052 // The container just contained a forward reference to this message, so let's 053 // fill in the threadable field of the container with this message 054 container.threadable = threadable; 055 } 056 } 057 058 // No container exists for that message Id. Create one and insert it into the hash table. 059 if (container == null) { 060 container = new NntpThreadContainer(); 061 container.threadable = threadable; 062 idTable.put(id, container); 063 } 064 065 // Iterate through all the references and create ThreadContainers for any references that 066 // don't have them. 067 NntpThreadContainer parentRef = null; 068 { 069 final String[] references = threadable.messageThreadReferences(); 070 for (final String refString : references) { 071 NntpThreadContainer ref = idTable.get(refString); 072 073 // if this id doesn't have a container, create one 074 if (ref == null) { 075 ref = new NntpThreadContainer(); 076 idTable.put(refString, ref); 077 } 078 079 // Link references together in the order they appear in the References: header, 080 // IF they don't have a parent already && 081 // IF it will not cause a circular reference 082 if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) { 083 // Link ref into the parent's child list 084 ref.parent = parentRef; 085 ref.next = parentRef.child; 086 parentRef.child = ref; 087 } 088 parentRef = ref; 089 } 090 } 091 092 // parentRef is now set to the container of the last element in the references field. make that 093 // be the parent of this container, unless doing so causes a circular reference 094 if (parentRef != null && (parentRef == container || container.findChild(parentRef))) { 095 parentRef = null; 096 } 097 098 // if it has a parent already, it's because we saw this message in a References: field, and presumed 099 // a parent based on the other entries in that field. Now that we have the actual message, we can 100 // throw away the old parent and use this new one 101 if (container.parent != null) { 102 NntpThreadContainer rest, prev; 103 104 for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) { 105 if (rest == container) { 106 break; 107 } 108 } 109 110 if (rest == null) { 111 throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent); 112 } 113 114 // Unlink this container from the parent's child list 115 if (prev == null) { 116 container.parent.child = container.next; 117 } else { 118 prev.next = container.next; 119 } 120 121 container.next = null; 122 container.parent = null; 123 } 124 125 // If we have a parent, link container into the parents child list 126 if (parentRef != null) { 127 container.parent = parentRef; 128 container.next = parentRef.child; 129 parentRef.child = container; 130 } 131 } 132 133 /** 134 * Find the root set of all existing ThreadContainers 135 * 136 * @param idTable 137 * @return root the NntpThreadContainer representing the root node 138 */ 139 private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) { 140 final NntpThreadContainer root = new NntpThreadContainer(); 141 for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) { 142 final NntpThreadContainer c = entry.getValue(); 143 if (c.parent == null) { 144 if (c.next != null) { 145 throw new IllegalStateException("c.next is " + c.next.toString()); 146 } 147 c.next = root.child; 148 root.child = c; 149 } 150 } 151 return root; 152 } 153 154 /** 155 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 156 * 157 * @param root 158 */ 159 private void gatherSubjects(final NntpThreadContainer root) { 160 161 int count = 0; 162 163 for (NntpThreadContainer c = root.child; c != null; c = c.next) { 164 count++; 165 } 166 167 // TODO verify this will avoid rehashing 168 HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9); 169 count = 0; 170 171 for (NntpThreadContainer c = root.child; c != null; c = c.next) { 172 Threadable threadable = c.threadable; 173 174 // No threadable? If so, it is a dummy node in the root set. 175 // Only root set members may be dummies, and they always have at least 2 kids 176 // Take the first kid as representative of the subject 177 if (threadable == null) { 178 threadable = c.child.threadable; 179 } 180 181 final String subj = threadable.simplifiedSubject(); 182 183 if (subj == null || subj.isEmpty()) { 184 continue; 185 } 186 187 final NntpThreadContainer old = subjectTable.get(subj); 188 189 // Add this container to the table iff: 190 // - There exists no container with this subject 191 // - or this is a dummy container and the old one is not - the dummy one is 192 // more interesting as a root, so put it in the table instead 193 // - The container in the table has a "Re:" version of this subject, and 194 // this container has a non-"Re:" version of this subject. The non-"Re:" version 195 // is the more interesting of the two. 196 if (old == null || c.threadable == null && old.threadable != null 197 || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) { 198 subjectTable.put(subj, c); 199 count++; 200 } 201 } 202 203 // If the table is empty, we're done 204 if (count == 0) { 205 return; 206 } 207 208 // subjectTable is now populated with one entry for each subject which occurs in the 209 // root set. Iterate over the root set, and gather together the difference. 210 NntpThreadContainer prev, c, rest; 211 for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) { 212 Threadable threadable = c.threadable; 213 214 // is it a dummy node? 215 if (threadable == null) { 216 threadable = c.child.threadable; 217 } 218 219 final String subj = threadable.simplifiedSubject(); 220 221 // Don't thread together all subjectless messages 222 if (subj == null || subj.isEmpty()) { 223 continue; 224 } 225 226 final NntpThreadContainer old = subjectTable.get(subj); 227 228 if (old == c) { // That's us 229 continue; 230 } 231 232 // We have now found another container in the root set with the same subject 233 // Remove the "second" message from the root set 234 if (prev == null) { 235 root.child = c.next; 236 } else { 237 prev.next = c.next; 238 } 239 c.next = null; 240 241 if (old.threadable == null && c.threadable == null) { 242 // both dummies - merge them 243 NntpThreadContainer tail; 244 for (tail = old.child; tail != null && tail.next != null; tail = tail.next) { 245 // do nothing 246 } 247 248 if (tail != null) { // protect against possible NPE 249 tail.next = c.child; 250 } 251 252 for (tail = c.child; tail != null; tail = tail.next) { 253 tail.parent = old; 254 } 255 256 c.child = null; 257 } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) { 258 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 259 c.parent = old; 260 c.next = old.child; 261 old.child = c; 262 } else { 263 // else make the old and new messages be children of a new dummy container. 264 // We create a new container object for old.msg and empty the old container 265 final NntpThreadContainer newc = new NntpThreadContainer(); 266 newc.threadable = old.threadable; 267 newc.child = old.child; 268 269 for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) { 270 tail.parent = newc; 271 } 272 273 old.threadable = null; 274 old.child = null; 275 276 c.parent = old; 277 newc.parent = old; 278 279 // Old is now a dummy- give it 2 kids , c and newc 280 old.child = c; 281 c.next = newc; 282 } 283 // We've done a merge, so keep the same prev 284 c = prev; 285 } 286 287 subjectTable.clear(); 288 subjectTable = null; 289 290 } 291 292 /** 293 * Delete any empty or dummy ThreadContainers 294 * 295 * @param parent 296 */ 297 private void pruneEmptyContainers(final NntpThreadContainer parent) { 298 NntpThreadContainer container, prev, next; 299 for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null 300 ? null 301 : container.next) { 302 303 // Is it empty and without any children? If so,delete it 304 if (container.threadable == null && container.child == null) { 305 if (prev == null) { 306 parent.child = container.next; 307 } else { 308 prev.next = container.next; 309 } 310 311 // Set container to prev so that prev keeps its same value the next time through the loop 312 container = prev; 313 } 314 315 // Else if empty, with kids, and (not at root or only one kid) 316 else if (container.threadable == null && (container.parent != null || container.child.next == null)) { 317 // We have an invalid/expired message with kids. Promote the kids to this level. 318 NntpThreadContainer tail; 319 final NntpThreadContainer kids = container.child; 320 321 // Remove this container and replace with 'kids'. 322 if (prev == null) { 323 parent.child = kids; 324 } else { 325 prev.next = kids; 326 } 327 328 // Make each child's parent be this level's parent -> i.e. promote the children. 329 // Make the last child's next point to this container's next 330 // i.e. splice kids into the list in place of container 331 for (tail = kids; tail.next != null; tail = tail.next) { 332 tail.parent = container.parent; 333 } 334 335 tail.parent = container.parent; 336 tail.next = container.next; 337 338 // next currently points to the item after the inserted items in the chain - reset that, so we process the newly 339 // promoted items next time round 340 next = kids; 341 342 // Set container to prev so that prev keeps its same value the next time through the loop 343 container = prev; 344 } else if (container.child != null) { 345 // A real message , with kids 346 // Iterate over the children 347 pruneEmptyContainers(container); 348 } 349 } 350 } 351 352 /** 353 * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages 354 * 355 * @param messages iterable of messages to thread, must not be empty 356 * @return null if messages == null or root.child == null or messages list is empty 357 * @since 3.0 358 */ 359 public Threadable thread(final Iterable<? extends Threadable> messages) { 360 if (messages == null) { 361 return null; 362 } 363 364 HashMap<String, NntpThreadContainer> idTable = new HashMap<>(); 365 366 // walk through each Threadable element 367 for (final Threadable t : messages) { 368 if (!t.isDummy()) { 369 buildContainer(t, idTable); 370 } 371 } 372 373 if (idTable.isEmpty()) { 374 return null; 375 } 376 377 final NntpThreadContainer root = findRootSet(idTable); 378 idTable.clear(); 379 idTable = null; 380 381 pruneEmptyContainers(root); 382 383 root.reverseChildren(); 384 gatherSubjects(root); 385 386 if (root.next != null) { 387 throw new IllegalStateException("root node has a next:" + root); 388 } 389 390 for (NntpThreadContainer r = root.child; r != null; r = r.next) { 391 if (r.threadable == null) { 392 r.threadable = r.child.threadable.makeDummy(); 393 } 394 } 395 396 final Threadable result = root.child == null ? null : root.child.threadable; 397 root.flush(); 398 399 return result; 400 } 401 402 /** 403 * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages 404 * 405 * @param messages list of messages to thread, must not be empty 406 * @return null if messages == null or root.child == null or messages list is empty 407 * @since 2.2 408 */ 409 public Threadable thread(final List<? extends Threadable> messages) { 410 return thread((Iterable<? extends Threadable>) messages); 411 } 412 413 // DEPRECATED METHODS - for API compatibility only - DO NOT USE 414 415 /** 416 * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages 417 * 418 * @param messages array of messages to thread, must not be empty 419 * @return null if messages == null or root.child == null or messages array is empty 420 * @deprecated (2.2) prefer {@link #thread(List)} 421 */ 422 @Deprecated 423 public Threadable thread(final Threadable[] messages) { 424 if (messages == null) { 425 return null; 426 } 427 return thread(Arrays.asList(messages)); 428 } 429 430}