24 files changed, 411 insertions, 258 deletions
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index d8dd660898b9..5943f7f9d325 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -46,3 +46,4 @@ generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += xor.h
+generic-y += preempt.h
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 5802849a6cae..2fd3162ec4df 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -17,13 +17,7 @@
 #endif
 
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
-
-/* For a rare case where customers have differently config I/D */
-#define ARC_ICACHE_LINE_LEN	L1_CACHE_BYTES
-#define ARC_DCACHE_LINE_LEN	L1_CACHE_BYTES
-
-#define ICACHE_LINE_MASK	(~(ARC_ICACHE_LINE_LEN - 1))
-#define DCACHE_LINE_MASK	(~(ARC_DCACHE_LINE_LEN - 1))
+#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
 
 /*
  * ARC700 doesn't cache any access in top 256M.
@@ -57,7 +51,7 @@
 
 extern void arc_cache_init(void);
 extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void __init read_decode_cache_bcr(void);
+extern void read_decode_cache_bcr(void);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index 442ce5d0f709..43de30256981 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -53,11 +53,10 @@ static inline void __udelay(unsigned long usecs)
 {
 	unsigned long loops;
 
-	/* (long long) cast ensures 64 bit MPY - real or emulated
+	/* (u64) cast ensures 64 bit MPY - real or emulated
 	 * HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops
 	 */
-	loops = ((long long)(usecs * 4295 * HZ) *
-		 (long long)(loops_per_jiffy)) >> 32;
+	loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
 
 	__delay(loops);
 }
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index df57611652e5..884081099f80 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -365,7 +365,7 @@
  * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
  *
  * Before saving the full regfile - this reg is restored back, only
- * to be saved again on kernel mode stack, as part of ptregs.
+ * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro EXCPN_PROLOG_FREEUP_REG	reg
 #ifdef CONFIG_SMP
@@ -384,6 +384,28 @@
 .endm
 
 /*--------------------------------------------------------------
+ * Exception Entry prologue
+ * -Switches stack to K mode (if not already)
+ * -Saves the register file
+ *
+ * After this it is safe to call the "C" handlers
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+	/* Need at least 1 reg to code the early exception prologue */
+	EXCPN_PROLOG_FREEUP_REG r9
+
+	/* U/K mode at time of exception (stack not switched if already K) */
+	lr  r9, [erstatus]
+
+	/* ARC700 doesn't provide auto-stack switching */
+	SWITCH_TO_KERNEL_STK
+
+	/* save the regfile */
+	SAVE_ALL_SYS
+.endm
+
+/*--------------------------------------------------------------
  * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc)
  * Requires SP to be already switched to kernel mode Stack
  * sp points to the next free element on the stack at exit of this macro.
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 473424d7528b..334ce7017a18 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -100,6 +100,10 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 
 }
 
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+
 #include <asm-generic/io.h>
 
 #endif /* _ASM_ARC_IO_H */
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index c0a72105ee0b..291a70db68b8 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -18,8 +18,8 @@
 
 #include <asm-generic/irq.h>
 
-extern void __init arc_init_IRQ(void);
-extern int __init get_hw_config_num_irq(void);
+extern void arc_init_IRQ(void);
+extern int get_hw_config_num_irq(void);
 
 void arc_local_timer_setup(unsigned int cpu);
 
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index d99f79bcf865..cb7efc29f16f 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -151,23 +151,38 @@ static inline void arch_unmask_irq(unsigned int irq)
 
 #else
 
-.macro IRQ_DISABLE  scratch
-	lr	\scratch, [status32]
-	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
-	flag	\scratch
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+.macro TRACE_ASM_IRQ_DISABLE
+	bl	trace_hardirqs_off
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+	bl	trace_hardirqs_on
+.endm
+
+#else
+
+.macro TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
 .endm
 
-.macro IRQ_DISABLE_SAVE  scratch, save
+#endif
+
+.macro IRQ_DISABLE  scratch
 	lr	\scratch, [status32]
-	mov	\save, \scratch		/* Make a copy */
 	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
 	flag	\scratch
+	TRACE_ASM_IRQ_DISABLE
 .endm
 
 .macro IRQ_ENABLE  scratch
 	lr	\scratch, [status32]
 	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
 	flag	\scratch
+	TRACE_ASM_IRQ_ENABLE
 .endm
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index 9998dc846ebb..e8993a2be6c2 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -51,22 +51,12 @@ struct machine_desc {
 /*
  * Current machine - only accessible during boot.
  */
-extern struct machine_desc *machine_desc;
+extern const struct machine_desc *machine_desc;
 
 /*
  * Machine type table - also only accessible during boot
  */
-extern struct machine_desc __arch_info_begin[], __arch_info_end[];
-#define for_each_machine_desc(p)			\
-	for (p = __arch_info_begin; p < __arch_info_end; p++)
-
-static inline struct machine_desc *default_machine_desc(void)
-{
-	/* the default machine is the last one linked in */
-	if (__arch_info_end - 1 < __arch_info_begin)
-		return NULL;
-	return __arch_info_end - 1;
-}
+extern const struct machine_desc __arch_info_begin[], __arch_info_end[];
 
 /*
  * Set of macros to define architecture features.
@@ -81,7 +71,6 @@ __attribute__((__section__(".arch.info.init"))) = {	\
 #define MACHINE_END				\
 };
 
-extern struct machine_desc *setup_machine_fdt(void *dt);
-extern void __init copy_devtree(void);
+extern const struct machine_desc *setup_machine_fdt(void *dt);
 
 #endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 7c03fe61759c..8c84ae98c337 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -32,6 +32,8 @@
 /* Error code if probe fails */
 #define TLB_LKUP_ERR		0x80000000
 
+#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
+
 /* TLB Commands */
 #define TLBWrite    0x1
 #define TLBRead     0x2
@@ -46,21 +48,18 @@
 #ifndef __ASSEMBLY__
 
 typedef struct {
-	unsigned long asid;	/* Pvt Addr-Space ID for mm */
-#ifdef CONFIG_ARC_TLB_DBG
-	struct task_struct *tsk;
-#endif
+	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int pid_sw, unsigned long address);
+void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
 #else
 #define tlb_paranoid_check(a, b)
 #endif
 
 void arc_mmu_init(void);
 extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void __init read_decode_mmu_bcr(void);
+void read_decode_mmu_bcr(void);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 0d71fb11b57c..1fd467ef658f 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -30,99 +30,72 @@
  * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
  *
  * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cache.
+ * of H/w ASID is done using software tracker @asid_cpu.
  * When it reaches max 255, the allocation cycle starts afresh by flushing
  * the entire TLB and wrapping ASID back to zero.
  *
- * For book-keeping, Linux uses a couple of data-structures:
- *  -mm_struct has an @asid field to keep a note of task's ASID (needed at the
- *   time of say switch_mm( )
- *  -An array of mm structs @asid_mm_map[] for asid->mm the reverse mapping,
- *  given an ASID, finding the mm struct associated.
- *
- * The round-robin allocation algorithm allows for ASID stealing.
- * If asid tracker is at "x-1", a new req will allocate "x", even if "x" was
- * already assigned to another (switched-out) task. Obviously the prev owner
- * is marked with an invalid ASID to make it request for a new ASID when it
- * gets scheduled next time. However its TLB entries (with ASID "x") could
- * exist, which must be cleared before the same ASID is used by the new owner.
- * Flushing them would be plausible but costly solution. Instead we force a
- * allocation policy quirk, which ensures that a stolen ASID won't have any
- * TLB entries associates, alleviating the need to flush.
- * The quirk essentially is not allowing ASID allocated in prev cycle
- * to be used past a roll-over in the next cycle.
- * When this happens (i.e. task ASID > asid tracker), task needs to refresh
- * its ASID, aligning it to current value of tracker. If the task doesn't get
- * scheduled past a roll-over, hence its ASID is not yet realigned with
- * tracker, such ASID is anyways safely reusable because it is
- * gauranteed that TLB entries with that ASID wont exist.
+ * A new allocation cycle, post rollover, could potentially reassign an ASID
+ * to a different task. Thus the rule is to refresh the ASID in a new cycle.
+ * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits
+ * serve as cycle/generation indicator and natural 32 bit unsigned math
+ * automagically increments the generation when lower 8 bits rollover.
  */
 
-#define FIRST_ASID  0
-#define MAX_ASID    255			/* 8 bit PID field in PID Aux reg */
-#define NO_ASID     (MAX_ASID + 1)	/* ASID Not alloc to mmu ctxt */
-#define NUM_ASID    ((MAX_ASID - FIRST_ASID) + 1)
+#define MM_CTXT_ASID_MASK	0x000000ff /* MMU PID reg :8 bit PID */
+#define MM_CTXT_CYCLE_MASK	(~MM_CTXT_ASID_MASK)
+
+#define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
+#define MM_CTXT_NO_ASID		0UL
 
-/* ASID to mm struct mapping */
-extern struct mm_struct *asid_mm_map[NUM_ASID + 1];
+#define asid_mm(mm, cpu)	mm->context.asid[cpu]
+#define hw_pid(mm, cpu)		(asid_mm(mm, cpu) & MM_CTXT_ASID_MASK)
 
-extern int asid_cache;
+DECLARE_PER_CPU(unsigned int, asid_cache);
+#define asid_cpu(cpu)		per_cpu(asid_cache, cpu)
 
 /*
- * Assign a new ASID to task. If the task already has an ASID, it is
- * relinquished.
+ * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
+ * Also set the MMU PID register to existing/updated ASID
  */
 static inline void get_new_mmu_context(struct mm_struct *mm)
 {
-	struct mm_struct *prev_owner;
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	local_irq_save(flags);
 
 	/*
-	 * Relinquish the currently owned ASID (if any).
-	 * Doing unconditionally saves a cmp-n-branch; for already unused
-	 * ASID slot, the value was/remains NULL
+	 * Move to new ASID if it was not from current alloc-cycle/generation.
+	 * This is done by ensuring that the generation bits in both mm->ASID
+	 * and cpu's ASID counter are exactly same.
+	 *
+	 * Note: Callers needing new ASID unconditionally, independent of
+	 * 	 generation, e.g. local_flush_tlb_mm() for forking  parent,
+	 * 	 first need to destroy the context, setting it to invalid
+	 * 	 value.
 	 */
-	asid_mm_map[mm->context.asid] = (struct mm_struct *)NULL;
+	if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK))
+		goto set_hw;
 
-	/* move to new ASID */
-	if (++asid_cache > MAX_ASID) {	/* ASID roll-over */
-		asid_cache = FIRST_ASID;
-		flush_tlb_all();
-	}
+	/* move to new ASID and handle rollover */
+	if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) {
 
-	/*
-	 * Is next ASID already owned by some-one else (we are stealing it).
-	 * If so, let the orig owner be aware of this, so when it runs, it
-	 * asks for a brand new ASID. This would only happen for a long-lived
-	 * task with ASID from prev allocation cycle (before ASID roll-over).
-	 *
-	 * This might look wrong - if we are re-using some other task's ASID,
-	 * won't we use it's stale TLB entries too. Actually switch_mm( ) takes
-	 * care of such a case: it ensures that task with ASID from prev alloc
-	 * cycle, when scheduled will refresh it's ASID: see switch_mm( ) below
-	 * The stealing scenario described here will only happen if that task
-	 * didn't get a chance to refresh it's ASID - implying stale entries
-	 * won't exist.
-	 */
-	prev_owner = asid_mm_map[asid_cache];
-	if (prev_owner)
-		prev_owner->context.asid = NO_ASID;
+		local_flush_tlb_all();
+
+		/*
+		 * Above checke for rollover of 8 bit ASID in 32 bit container.
+		 * If the container itself wrapped around, set it to a non zero
+		 * "generation" to distinguish from no context
+		 */
+		if (!asid_cpu(cpu))
+			asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE;
+	}
 
 	/* Assign new ASID to tsk */
-	asid_mm_map[asid_cache] = mm;
-	mm->context.asid = asid_cache;
-
-#ifdef CONFIG_ARC_TLB_DBG
-	pr_info("ARC_TLB_DBG: NewMM=0x%x OldMM=0x%x task_struct=0x%x Task: %s,"
-	       " pid:%u, assigned asid:%lu\n",
-	       (unsigned int)mm, (unsigned int)prev_owner,
-	       (unsigned int)(mm->context.tsk), (mm->context.tsk)->comm,
-	       (mm->context.tsk)->pid, mm->context.asid);
-#endif
+	asid_mm(mm, cpu) = asid_cpu(cpu);
 
-	write_aux_reg(ARC_REG_PID, asid_cache | MMU_ENABLE);
+set_hw:
+	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
 
 	local_irq_restore(flags);
 }
@@ -134,59 +107,61 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	mm->context.asid = NO_ASID;
-#ifdef CONFIG_ARC_TLB_DBG
-	mm->context.tsk = tsk;
-#endif
+	int i;
+
+	for_each_possible_cpu(i)
+		asid_mm(mm, i) = MM_CTXT_NO_ASID;
+
 	return 0;
 }
 
+static inline void destroy_context(struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	/* Needed to elide CONFIG_DEBUG_PREEMPT warning */
+	local_irq_save(flags);
+	asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID;
+	local_irq_restore(flags);
+}
+
 /* Prepare the MMU for task: setup PID reg with allocated ASID
     If task doesn't have an ASID (never alloc or stolen, get a new ASID)
 */
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
 {
+	const int cpu = smp_processor_id();
+
+	/*
+	 * Note that the mm_cpumask is "aggregating" only, we don't clear it
+	 * for the switched-out task, unlike some other arches.
+	 * It is used to enlist cpus for sending TLB flush IPIs and not sending
+	 * it to CPUs where a task once ran-on, could cause stale TLB entry
+	 * re-use, specially for a multi-threaded task.
+	 * e.g. T1 runs on C1, migrates to C3. T2 running on C2 munmaps.
+	 *      For a non-aggregating mm_cpumask, IPI not sent C1, and if T1
+	 *      were to re-migrate to C1, it could access the unmapped region
+	 *      via any existing stale TLB entries.
+	 */
+	cpumask_set_cpu(cpu, mm_cpumask(next));
+
 #ifndef CONFIG_SMP
 	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
 #endif
 
-	/*
-	 * Get a new ASID if task doesn't have a valid one. Possible when
-	 *  -task never had an ASID (fresh after fork)
-	 *  -it's ASID was stolen - past an ASID roll-over.
-	 *  -There's a third obscure scenario (if this task is running for the
-	 *   first time afer an ASID rollover), where despite having a valid
-	 *   ASID, we force a get for new ASID - see comments at top.
-	 *
-	 * Both the non-alloc scenario and first-use-after-rollover can be
-	 * detected using the single condition below:  NO_ASID = 256
-	 * while asid_cache is always a valid ASID value (0-255).
-	 */
-	if (next->context.asid > asid_cache) {
-		get_new_mmu_context(next);
-	} else {
-		/*
-		 * XXX: This will never happen given the chks above
-		 * BUG_ON(next->context.asid > MAX_ASID);
-		 */
-		write_aux_reg(ARC_REG_PID, next->context.asid | MMU_ENABLE);
-	}
-
+	get_new_mmu_context(next);
 }
 
-static inline void destroy_context(struct mm_struct *mm)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	asid_mm_map[mm->context.asid] = NULL;
-	mm->context.asid = NO_ASID;
-
-	local_irq_restore(flags);
-}
+/*
+ * Called at the time of execve() to get a new ASID
+ * Note the subtlety here: get_new_mmu_context() behaves differently here
+ * vs. in switch_mm(). Here it always returns a new ASID, because mm has
+ * an unallocated "initial" value, while in latter, it moves to a new ASID,
+ * only if it was unallocated
+ */
+#define activate_mm(prev, next)		switch_mm(prev, next, NULL)
 
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
  * for retiring-mm. However destroy_context( ) still needs to do that because
@@ -197,17 +172,6 @@ static inline void destroy_context(struct mm_struct *mm)
  */
 #define deactivate_mm(tsk, mm)   do { } while (0)
 
-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
-#ifndef CONFIG_SMP
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
-
-	/* Unconditionally get a new ASID */
-	get_new_mmu_context(next);
-
-}
-
 #define enter_lazy_tlb(mm, tsk)
 
 #endif /* __ASM_ARC_MMU_CONTEXT_H */
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index 115ad96480e6..cbf755e32a03 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -1,5 +1,7 @@
 /*
- * Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ * Linux performance counter support for ARC
+ *
+ * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -10,4 +12,204 @@
 #ifndef __ASM_PERF_EVENT_H
 #define __ASM_PERF_EVENT_H
 
+/* real maximum varies per CPU, this is the maximum supported by the driver */
+#define ARC_PMU_MAX_HWEVENTS	64
+
+#define ARC_REG_CC_BUILD	0xF6
+#define ARC_REG_CC_INDEX	0x240
+#define ARC_REG_CC_NAME0	0x241
+#define ARC_REG_CC_NAME1	0x242
+
+#define ARC_REG_PCT_BUILD	0xF5
+#define ARC_REG_PCT_COUNTL	0x250
+#define ARC_REG_PCT_COUNTH	0x251
+#define ARC_REG_PCT_SNAPL	0x252
+#define ARC_REG_PCT_SNAPH	0x253
+#define ARC_REG_PCT_CONFIG	0x254
+#define ARC_REG_PCT_CONTROL	0x255
+#define ARC_REG_PCT_INDEX	0x256
+
+#define ARC_REG_PCT_CONTROL_CC	(1 << 16)	/* clear counts */
+#define ARC_REG_PCT_CONTROL_SN	(1 << 17)	/* snapshot */
+
+struct arc_reg_pct_build {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int m:8, c:8, r:6, s:2, v:8;
+#else
+	unsigned int v:8, s:2, r:6, c:8, m:8;
+#endif
+};
+
+struct arc_reg_cc_build {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int c:16, r:8, v:8;
+#else
+	unsigned int v:8, r:8, c:16;
+#endif
+};
+
+#define PERF_COUNT_ARC_DCLM	(PERF_COUNT_HW_MAX + 0)
+#define PERF_COUNT_ARC_DCSM	(PERF_COUNT_HW_MAX + 1)
+#define PERF_COUNT_ARC_ICM	(PERF_COUNT_HW_MAX + 2)
+#define PERF_COUNT_ARC_BPOK	(PERF_COUNT_HW_MAX + 3)
+#define PERF_COUNT_ARC_EDTLB	(PERF_COUNT_HW_MAX + 4)
+#define PERF_COUNT_ARC_EITLB	(PERF_COUNT_HW_MAX + 5)
+#define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 6)
+
+/*
+ * The "generalized" performance events seem to really be a copy
+ * of the available events on x86 processors; the mapping to ARC
+ * events is not always possible 1-to-1. Fortunately, there doesn't
+ * seem to be an exact definition for these events, so we can cheat
+ * a bit where necessary.
+ *
+ * In particular, the following PERF events may behave a bit differently
+ * compared to other architectures:
+ *
+ * PERF_COUNT_HW_CPU_CYCLES
+ *	Cycles not in halted state
+ *
+ * PERF_COUNT_HW_REF_CPU_CYCLES
+ *	Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES
+ *	for now as we don't do Dynamic Voltage/Frequency Scaling (yet)
+ *
+ * PERF_COUNT_HW_BUS_CYCLES
+ *	Unclear what this means, Intel uses 0x013c, which according to
+ *	their datasheet means "unhalted reference cycles". It sounds similar
+ *	to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it.
+ *
+ * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+ * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+ *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
+ *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
+ *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
+ *	STALLED_CYCLES_FRONTEND.
+ *
+ *	We could start multiple performance counters and combine everything
+ *	afterwards, but that makes it complicated.
+ *
+ *	Note that I$ cache misses aren't counted by either of the two!
+ */
+
+static const char * const arc_pmu_ev_hw_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
+	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
+	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail",
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
+	[PERF_COUNT_ARC_DCLM] = "dclm",
+	[PERF_COUNT_ARC_DCSM] = "dcsm",
+	[PERF_COUNT_ARC_ICM] = "icm",
+	[PERF_COUNT_ARC_BPOK] = "bpok",
+	[PERF_COUNT_ARC_EDTLB] = "edtlb",
+	[PERF_COUNT_ARC_EITLB] = "eitlb",
+};
+
+#define C(_x)			PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED	0xffff
+
+static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
 #endif /* __ASM_PERF_EVENT_H */
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 36a9f20c21a3..81208bfd9dcb 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -105,11 +105,16 @@ static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pgtable_t pte_pg;
+	struct page *page;
 
 	pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
-	if (pte_pg) {
-		memzero((void *)pte_pg, PTRS_PER_PTE * 4);
-		pgtable_page_ctor(virt_to_page(pte_pg));
+	if (!pte_pg)
+		return 0;
+	memzero((void *)pte_pg, PTRS_PER_PTE * 4);
+	page = virt_to_page(pte_pg);
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return 0;
 	}
 
 	return pte_pg;
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 4749a0eee1cf..6b0b7f7ef783 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -57,43 +57,31 @@
 
 #define _PAGE_ACCESSED      (1<<1)	/* Page is accessed (S) */
 #define _PAGE_CACHEABLE     (1<<2)	/* Page is cached (H) */
-#define _PAGE_U_EXECUTE     (1<<3)	/* Page has user execute perm (H) */
-#define _PAGE_U_WRITE       (1<<4)	/* Page has user write perm (H) */
-#define _PAGE_U_READ        (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_K_EXECUTE     (1<<6)	/* Page has kernel execute perm (H) */
-#define _PAGE_K_WRITE       (1<<7)	/* Page has kernel write perm (H) */
-#define _PAGE_K_READ        (1<<8)	/* Page has kernel perm (H) */
-#define _PAGE_GLOBAL        (1<<9)	/* Page is global (H) */
-#define _PAGE_MODIFIED      (1<<10)	/* Page modified (dirty) (S) */
-#define _PAGE_FILE          (1<<10)	/* page cache/ swap (S) */
-#define _PAGE_PRESENT       (1<<11)	/* TLB entry is valid (H) */
+#define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
+#define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
+#define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
+#define _PAGE_MODIFIED      (1<<6)	/* Page modified (dirty) (S) */
+#define _PAGE_FILE          (1<<7)	/* page cache/ swap (S) */
+#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
+#define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
 
-#else
+#else	/* MMU v3 onwards */
 
-/* PD1 */
 #define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
-#define _PAGE_U_EXECUTE     (1<<1)	/* Page has user execute perm (H) */
-#define _PAGE_U_WRITE       (1<<2)	/* Page has user write perm (H) */
-#define _PAGE_U_READ        (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_K_EXECUTE     (1<<4)	/* Page has kernel execute perm (H) */
-#define _PAGE_K_WRITE       (1<<5)	/* Page has kernel write perm (H) */
-#define _PAGE_K_READ        (1<<6)	/* Page has kernel perm (H) */
-#define _PAGE_ACCESSED      (1<<7)	/* Page is accessed (S) */
-
-/* PD0 */
+#define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
+#define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
+#define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
+#define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
+#define _PAGE_MODIFIED      (1<<5)	/* Page modified (dirty) (S) */
+#define _PAGE_FILE          (1<<6)	/* page cache/ swap (S) */
 #define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
 #define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
-#define _PAGE_SHARED_CODE   (1<<10)	/* Shared Code page with cmn vaddr
+#define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
 					   usable for shared TLB entries (H) */
-
-#define _PAGE_MODIFIED      (1<<11)	/* Page modified (dirty) (S) */
-#define _PAGE_FILE          (1<<12)	/* page cache/ swap (S) */
-
-#define _PAGE_SHARED_CODE_H (1<<31)	/* Hardware counterpart of above */
 #endif
 
-/* Kernel allowed all permissions for all pages */
-#define _K_PAGE_PERMS  (_PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ | \
+/* vmalloc permissions */
+#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
 			_PAGE_GLOBAL | _PAGE_PRESENT)
 
 #ifdef CONFIG_ARC_CACHE_PAGES
@@ -109,10 +97,6 @@
  */
 #define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE)
 
-#define _PAGE_READ	(_PAGE_U_READ    | _PAGE_K_READ)
-#define _PAGE_WRITE	(_PAGE_U_WRITE   | _PAGE_K_WRITE)
-#define _PAGE_EXECUTE	(_PAGE_U_EXECUTE | _PAGE_K_EXECUTE)
-
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED)
 
@@ -126,8 +110,8 @@
 
 #define PAGE_SHARED	PAGE_U_W_R
 
-/* While kernel runs out of unstrslated space, vmalloc/modules use a chunk of
- * kernel vaddr space - visible in all addr spaces, but kernel mode only
+/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
+ * user vaddr space - visible in all addr spaces, but kernel mode only
  * Thus Global, all-kernel-access, no-user-access, cached
  */
 #define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE)
@@ -136,10 +120,9 @@
 #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
 
 /* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0	(_PAGE_GLOBAL | _PAGE_PRESENT)
-#define PTE_BITS_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE | \
-			 _PAGE_U_EXECUTE | _PAGE_U_WRITE | _PAGE_U_READ | \
-			 _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ)
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
 
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
diff --git a/arch/arc/include/asm/prom.h b/arch/arc/include/asm/prom.h
deleted file mode 100644
index 692d0d0789a7..000000000000
--- a/arch/arc/include/asm/prom.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARC_PROM_H_
-#define _ASM_ARC_PROM_H_
-
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
-#endif
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index c9938e7a7dbd..1bfeec2c0558 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -20,27 +20,17 @@ struct pt_regs {
 
 	/* Real registers */
 	long bta;	/* bta_l1, bta_l2, erbta */
-	long lp_start;
-	long lp_end;
-	long lp_count;
+
+	long lp_start, lp_end, lp_count;
+
 	long status32;	/* status32_l1, status32_l2, erstatus */
 	long ret;	/* ilink1, ilink2 or eret */
 	long blink;
 	long fp;
 	long r26;	/* gp */
-	long r12;
-	long r11;
-	long r10;
-	long r9;
-	long r8;
-	long r7;
-	long r6;
-	long r5;
-	long r4;
-	long r3;
-	long r2;
-	long r1;
-	long r0;
+
+	long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
+
 	long sp;	/* user/kernel sp depending on where we came from  */
 	long orig_r0;
 
@@ -70,19 +60,7 @@ struct pt_regs {
 /* Callee saved registers - need to be saved only when you are scheduled out */
 
 struct callee_regs {
-	long r25;
-	long r24;
-	long r23;
-	long r22;
-	long r21;
-	long r20;
-	long r19;
-	long r18;
-	long r17;
-	long r16;
-	long r15;
-	long r14;
-	long r13;
+	long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
 };
 
 #define instruction_pointer(regs)	((regs)->ret)
diff --git a/arch/arc/include/asm/sections.h b/arch/arc/include/asm/sections.h
index 6fc1159dfefe..764f1e3ba752 100644
--- a/arch/arc/include/asm/sections.h
+++ b/arch/arc/include/asm/sections.h
@@ -11,7 +11,6 @@
 
 #include <asm-generic/sections.h>
 
-extern char _int_vec_base_lds[];
 extern char __arc_dccm_base[];
 extern char __dtb_start[];
 
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 229e50681497..e10f8cef56a8 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -31,7 +31,7 @@ struct cpuinfo_data {
 extern int root_mountflags, end_mem;
 extern int running_on_hw;
 
-void __init setup_processor(void);
+void setup_processor(void);
 void __init setup_arch_memory(void);
 
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index c4fb211dcd25..eefc29f08cdb 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -30,7 +30,7 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
  * APIs provided by arch SMP code to rest of arch code
  */
 extern void __init smp_init_cpus(void);
-extern void __init first_lines_of_secondary(void);
+extern void first_lines_of_secondary(void);
 extern const char *arc_platform_smp_cpuinfo(void);
 
 /*
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index f158197ac5b0..b6a8c2dfbe6e 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -45,7 +45,14 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+	unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+	__asm__ __volatile__(
+	"	ex  %0, [%1]		\n"
+	: "+r" (tmp)
+	: "r"(&(lock->slock))
+	: "memory");
+
 	smp_mb();
 }
 
diff --git a/arch/arc/include/asm/spinlock_types.h b/arch/arc/include/asm/spinlock_types.h
index 8276bfd61704..662627ced4f2 100644
--- a/arch/arc/include/asm/spinlock_types.h
+++ b/arch/arc/include/asm/spinlock_types.h
@@ -20,9 +20,9 @@ typedef struct {
 #define __ARCH_SPIN_LOCK_LOCKED		{ __ARCH_SPIN_LOCK_LOCKED__ }
 
 /*
- * Unlocked:     0x01_00_00_00
- * Read lock(s): 0x00_FF_00_00 to say 0x01
- * Write lock:   0x0, but only possible if prior value "unlocked" 0x0100_0000
+ * Unlocked     : 0x0100_0000
+ * Read lock(s) : 0x00FF_FFFF to 0x01  (Multiple Readers decrement it)
+ * Write lock   : 0x0, but only if prior value is "unlocked" 0x0100_0000
  */
 typedef struct {
 	volatile unsigned int	counter;
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 2d50a4cdd7f3..45be21672011 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -80,8 +80,6 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-#define PREEMPT_ACTIVE      0x10000000
-
 /*
  * thread information flags
  * - these are process state flags that various assembly files may need to
diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h
index b2f9bc7f68c8..71c7b2e4b874 100644
--- a/arch/arc/include/asm/tlbflush.h
+++ b/arch/arc/include/asm/tlbflush.h
@@ -18,11 +18,18 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void local_flush_tlb_range(struct vm_area_struct *vma,
 			   unsigned long start, unsigned long end);
 
-/* XXX: Revisit for SMP */
+#ifndef CONFIG_SMP
 #define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
 #define flush_tlb_page(vma, page)	local_flush_tlb_page(vma, page)
 #define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
 #define flush_tlb_all()			local_flush_tlb_all()
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
-
+#else
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+							 unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+#endif /* CONFIG_SMP */
 #endif
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 32420824375b..30c9baffa96f 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -43,7 +43,7 @@
  * Because it essentially checks if buffer end is within limit and @len is
  * non-ngeative, which implies that buffer start will be within limit too.
  *
- * The reason for rewriting being, for majorit yof cases, @len is generally
+ * The reason for rewriting being, for majority of cases, @len is generally
  * compile time constant, causing first sub-expression to be compile time
  * subsumed.
  *
@@ -53,7 +53,7 @@
  *
  */
 #define __user_ok(addr, sz)	(((sz) <= TASK_SIZE) && \
-				 (((addr)+(sz)) <= get_fs()))
+				 ((addr) <= (get_fs() - (sz))))
 #define __access_ok(addr, sz)	(unlikely(__kernel_ok) || \
 				 likely(__user_ok((addr), (sz))))
 
diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
index 60702f3751d2..3e5f071bc00c 100644
--- a/arch/arc/include/asm/unaligned.h
+++ b/arch/arc/include/asm/unaligned.h
@@ -22,7 +22,8 @@ static inline int
 misaligned_fixup(unsigned long address, struct pt_regs *regs,
 		 struct callee_regs *cregs)
 {
-	return 0;
+	/* Not fixed */
+	return 1;
 }
 #endif