Some users have reported instances of the host hanging with secondary
threads of a core waiting for the primary thread to exit the guest,
and the primary thread stuck in nap mode. This prompted a review of
the memory barriers in the guest entry/exit code, and this is the
result. Most of these changes are the suggestions of Dean Burdick
<deanburdick@us.ibm.com>.
The barriers between updating napping_threads and reading the
entry_exit_count on the one hand, and updating entry_exit_count and
reading napping_threads on the other, need to be isync not lwsync,
since we need to ensure that either the napping_threads update or the
entry_exit_count update get seen. It is not sufficient to order the
load vs. lwarx, as lwsync does; we need to order the load vs. the
stwcx., so we need isync.
In addition, we need a full sync before sending IPIs to wake other
threads from nap, to ensure that the write to the entry_exit_count is
visible before the IPI occurs.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
13: b machine_check_fwnmi
13: b machine_check_fwnmi
/*
* We come in here when wakened from nap mode on a secondary hw thread.
* Relocation is off and most register values are lost.
/*
* We come in here when wakened from nap mode on a secondary hw thread.
* Relocation is off and most register values are lost.
/* Clear our vcpu pointer so we don't come back in early */
li r0, 0
std r0, HSTATE_KVM_VCPU(r13)
/* Clear our vcpu pointer so we don't come back in early */
li r0, 0
std r0, HSTATE_KVM_VCPU(r13)
+ /*
+ * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing
+ * the nap_count, because once the increment to nap_count is
+ * visible we could be given another vcpu.
+ */
lwsync
/* Clear any pending IPI - we're an offline thread */
ld r5, HSTATE_XICS_PHYS(r13)
lwsync
/* Clear any pending IPI - we're an offline thread */
ld r5, HSTATE_XICS_PHYS(r13)
/* increment the nap count and then go to nap mode */
ld r4, HSTATE_KVM_VCORE(r13)
addi r4, r4, VCORE_NAP_COUNT
/* increment the nap count and then go to nap mode */
ld r4, HSTATE_KVM_VCORE(r13)
addi r4, r4, VCORE_NAP_COUNT
- lwsync /* make previous updates visible */
51: lwarx r3, 0, r4
addi r3, r3, 1
stwcx. r3, 0, r4
51: lwarx r3, 0, r4
addi r3, r3, 1
stwcx. r3, 0, r4
*/
/* Increment the threads-exiting-guest count in the 0xff00
bits of vcore->entry_exit_count */
*/
/* Increment the threads-exiting-guest count in the 0xff00
bits of vcore->entry_exit_count */
ld r5,HSTATE_KVM_VCORE(r13)
addi r6,r5,VCORE_ENTRY_EXIT
41: lwarx r3,0,r6
addi r0,r3,0x100
stwcx. r0,0,r6
bne 41b
ld r5,HSTATE_KVM_VCORE(r13)
addi r6,r5,VCORE_ENTRY_EXIT
41: lwarx r3,0,r6
addi r0,r3,0x100
stwcx. r0,0,r6
bne 41b
+ isync /* order stwcx. vs. reading napping_threads */
/*
* At this point we have an interrupt that we have to pass
/*
* At this point we have an interrupt that we have to pass
sld r0,r0,r4
andc. r3,r3,r0 /* no sense IPI'ing ourselves */
beq 43f
sld r0,r0,r4
andc. r3,r3,r0 /* no sense IPI'ing ourselves */
beq 43f
+ /* Order entry/exit update vs. IPIs */
+ sync
mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
subf r6,r4,r13
42: andi. r0,r3,1
mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
subf r6,r4,r13
42: andi. r0,r3,1
bge kvm_cede_exit
stwcx. r4,0,r6
bne 31b
bge kvm_cede_exit
stwcx. r4,0,r6
bne 31b
+ /* order napping_threads update vs testing entry_exit_count */
+ isync
li r0,1
stb r0,HSTATE_NAPPING(r13)
li r0,1
stb r0,HSTATE_NAPPING(r13)
- /* order napping_threads update vs testing entry_exit_count */
- lwsync
mr r4,r3
lwz r7,VCORE_ENTRY_EXIT(r5)
cmpwi r7,0x100
mr r4,r3
lwz r7,VCORE_ENTRY_EXIT(r5)
cmpwi r7,0x100