This documentation is automatically generated by online-judge-tools/verification-helper
View the Project on GitHub tko919/library
#define PROBLEM \ "https://judge.yosupo.jp/problem/multivariate_convolution_cyclic" #include "Template/template.hpp" #include "Utility/fastio.hpp" #include "Math/dynamic.hpp" #include "Convolution/arbitrary.hpp" #include "FPS/arbitraryfps.hpp" template <> vector<Fp> Poly<Fp>::mult(const vector<Fp> &a, const vector<Fp> &b) const { return ArbitraryMult<Fp>(a, b); } #include "Convolution/multivariatecyclic.hpp" int main() { int p, k; read(p, k); Fp::set_mod(p); vector<int> a(k); read(a); int n = 1; for (auto &x : a) n *= x; vector<Fp> f(n), g(n); rep(i, 0, n) read(f[i].v); rep(i, 0, n) read(g[i].v); auto ret = MultivariateCyclic(f, g, a); rep(i, 0, n) print(ret[i].v); return 0; }
#line 1 "Verify/LC_multivariate_convolution_cyclic.test.cpp" #define PROBLEM \ "https://judge.yosupo.jp/problem/multivariate_convolution_cyclic" #line 1 "Template/template.hpp" #include <bits/stdc++.h> using namespace std; #define rep(i, a, b) for (int i = (int)(a); i < (int)(b); i++) #define rrep(i, a, b) for (int i = (int)(b-1); i >= (int)(a); i--) #define ALL(v) (v).begin(), (v).end() #define UNIQUE(v) sort(ALL(v)), (v).erase(unique(ALL(v)), (v).end()) #define SZ(v) (int)v.size() #define MIN(v) *min_element(ALL(v)) #define MAX(v) *max_element(ALL(v)) #define LB(v, x) int(lower_bound(ALL(v), (x)) - (v).begin()) #define UB(v, x) int(upper_bound(ALL(v), (x)) - (v).begin()) using uint = unsigned int; using ll = long long int; using ull = unsigned long long; using i128 = __int128_t; using u128 = __uint128_t; const int inf = 0x3fffffff; const ll INF = 0x1fffffffffffffff; template <typename T> inline bool chmax(T &a, T b) { if (a < b) { a = b; return 1; } return 0; } template <typename T> inline bool chmin(T &a, T b) { if (a > b) { a = b; return 1; } return 0; } template <typename T, typename U> T ceil(T x, U y) { assert(y != 0); if (y < 0) x = -x, y = -y; return (x > 0 ? (x + y - 1) / y : x / y); } template <typename T, typename U> T floor(T x, U y) { assert(y != 0); if (y < 0) x = -x, y = -y; return (x > 0 ? x / y : (x - y + 1) / y); } template <typename T> int popcnt(T x) { return __builtin_popcountll(x); } template <typename T> int topbit(T x) { return (x == 0 ? -1 : 63 - __builtin_clzll(x)); } template <typename T> int lowbit(T x) { return (x == 0 ? -1 : __builtin_ctzll(x)); } #ifdef LOCAL #define show(...) _show(0, #__VA_ARGS__, __VA_ARGS__) #else #define show(...) true #endif template <typename T> void _show(int i, T name) { cerr << '\n'; } template <typename T1, typename T2, typename... T3> void _show(int i, const T1 &a, const T2 &b, const T3 &...c) { for (; a[i] != ',' && a[i] != '\0'; i++) cerr << a[i]; cerr << ":" << b << " "; _show(i + 1, a, c...); } template <class T, class U> ostream &operator<<(ostream &os, const pair<T, U> &p) { os << "P(" << p.first << ", " << p.second << ")"; return os; } template <typename T, template <class> class C> ostream &operator<<(ostream &os, const C<T> &v) { os << "["; for (auto d : v) os << d << ", "; os << "]"; return os; } #line 2 "Utility/fastio.hpp" #include <unistd.h> namespace fastio { static constexpr uint32_t SZ = 1 << 17; char ibuf[SZ]; char obuf[SZ]; char out[100]; // pointer of ibuf, obuf uint32_t pil = 0, pir = 0, por = 0; struct Pre { char num[10000][4]; constexpr Pre() : num() { for (int i = 0; i < 10000; i++) { int n = i; for (int j = 3; j >= 0; j--) { num[i][j] = n % 10 | '0'; n /= 10; } } } } constexpr pre; inline void load() { memmove(ibuf, ibuf + pil, pir - pil); pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin); pil = 0; if (pir < SZ) ibuf[pir++] = '\n'; } inline void flush() { fwrite(obuf, 1, por, stdout); por = 0; } void rd(char &c) { do { if (pil + 1 > pir) load(); c = ibuf[pil++]; } while (isspace(c)); } void rd(string &x) { x.clear(); char c; do { if (pil + 1 > pir) load(); c = ibuf[pil++]; } while (isspace(c)); do { x += c; if (pil == pir) load(); c = ibuf[pil++]; } while (!isspace(c)); } template <typename T> void rd_real(T &x) { string s; rd(s); x = stod(s); } template <typename T> void rd_integer(T &x) { if (pil + 100 > pir) load(); char c; do c = ibuf[pil++]; while (c < '-'); bool minus = 0; if constexpr (is_signed<T>::value || is_same_v<T, i128>) { if (c == '-') { minus = 1, c = ibuf[pil++]; } } x = 0; while ('0' <= c) { x = x * 10 + (c & 15), c = ibuf[pil++]; } if constexpr (is_signed<T>::value || is_same_v<T, i128>) { if (minus) x = -x; } } void rd(int &x) { rd_integer(x); } void rd(ll &x) { rd_integer(x); } void rd(i128 &x) { rd_integer(x); } void rd(uint &x) { rd_integer(x); } void rd(ull &x) { rd_integer(x); } void rd(u128 &x) { rd_integer(x); } void rd(double &x) { rd_real(x); } void rd(long double &x) { rd_real(x); } template <class T, class U> void rd(pair<T, U> &p) { return rd(p.first), rd(p.second); } template <size_t N = 0, typename T> void rd_tuple(T &t) { if constexpr (N < std::tuple_size<T>::value) { auto &x = std::get<N>(t); rd(x); rd_tuple<N + 1>(t); } } template <class... T> void rd(tuple<T...> &tpl) { rd_tuple(tpl); } template <size_t N = 0, typename T> void rd(array<T, N> &x) { for (auto &d : x) rd(d); } template <class T> void rd(vector<T> &x) { for (auto &d : x) rd(d); } void read() {} template <class H, class... T> void read(H &h, T &...t) { rd(h), read(t...); } void wt(const char c) { if (por == SZ) flush(); obuf[por++] = c; } void wt(const string s) { for (char c : s) wt(c); } void wt(const char *s) { size_t len = strlen(s); for (size_t i = 0; i < len; i++) wt(s[i]); } template <typename T> void wt_integer(T x) { if (por > SZ - 100) flush(); if (x < 0) { obuf[por++] = '-', x = -x; } int outi; for (outi = 96; x >= 10000; outi -= 4) { memcpy(out + outi, pre.num[x % 10000], 4); x /= 10000; } if (x >= 1000) { memcpy(obuf + por, pre.num[x], 4); por += 4; } else if (x >= 100) { memcpy(obuf + por, pre.num[x] + 1, 3); por += 3; } else if (x >= 10) { int q = (x * 103) >> 10; obuf[por] = q | '0'; obuf[por + 1] = (x - q * 10) | '0'; por += 2; } else obuf[por++] = x | '0'; memcpy(obuf + por, out + outi + 4, 96 - outi); por += 96 - outi; } template <typename T> void wt_real(T x) { ostringstream oss; oss << fixed << setprecision(15) << double(x); string s = oss.str(); wt(s); } void wt(int x) { wt_integer(x); } void wt(ll x) { wt_integer(x); } void wt(i128 x) { wt_integer(x); } void wt(uint x) { wt_integer(x); } void wt(ull x) { wt_integer(x); } void wt(u128 x) { wt_integer(x); } void wt(double x) { wt_real(x); } void wt(long double x) { wt_real(x); } template <class T, class U> void wt(const pair<T, U> val) { wt(val.first); wt(' '); wt(val.second); } template <size_t N = 0, typename T> void wt_tuple(const T t) { if constexpr (N < std::tuple_size<T>::value) { if constexpr (N > 0) { wt(' '); } const auto x = std::get<N>(t); wt(x); wt_tuple<N + 1>(t); } } template <class... T> void wt(tuple<T...> tpl) { wt_tuple(tpl); } template <class T, size_t S> void wt(const array<T, S> val) { auto n = val.size(); for (size_t i = 0; i < n; i++) { if (i) wt(' '); wt(val[i]); } } template <class T> void wt(const vector<T> val) { auto n = val.size(); for (size_t i = 0; i < n; i++) { if (i) wt(' '); wt(val[i]); } } void print() { wt('\n'); } template <class Head, class... Tail> void print(Head &&head, Tail &&...tail) { wt(head); if (sizeof...(Tail)) wt(' '); print(forward<Tail>(tail)...); } void __attribute__((destructor)) _d() { flush(); } } // namespace fastio using fastio::flush; using fastio::print; using fastio::read; inline void first(bool i = true) { print(i ? "first" : "second"); } inline void Alice(bool i = true) { print(i ? "Alice" : "Bob"); } inline void yes(bool i = true) { print(i ? "yes" : "no"); } inline void Yes(bool i = true) { print(i ? "Yes" : "No"); } inline void No() { print("No"); } inline void YES(bool i = true) { print(i ? "YES" : "NO"); } inline void NO() { print("NO"); } inline void Yay(bool i = true) { print(i ? "Yay!" : ":("); } inline void Possible(bool i = true) { print(i ? "Possible" : "Impossible"); } inline void POSSIBLE(bool i = true) { print(i ? "POSSIBLE" : "IMPOSSIBLE"); } /** * @brief Fast IO */ #line 6 "Verify/LC_multivariate_convolution_cyclic.test.cpp" #line 2 "Math/fastdiv.hpp" struct FastDiv{ using u64=uint64_t; using u128=__uint128_t; constexpr FastDiv():m(),s(),x(){} constexpr FastDiv(int _m) :m(_m),s(__lg(m-1)),x(((u128(1)<<(s+64))+m-1)/m){} constexpr int get(){return m;} constexpr friend u64 operator/(u64 n,const FastDiv& d){ return (u128(n)*d.x>>d.s)>>64; } constexpr friend int operator%(u64 n,const FastDiv& d){ return n-n/d*d.m; } constexpr pair<u64,int> divmod(u64 n)const{ u64 q=n/(*this); return {q,n-q*m}; } int m,s; u64 x; }; /** * @brief Fast Division */ #line 3 "Math/dynamic.hpp" struct Fp{ using u64=uint64_t; int v; static int get_mod(){return _getmod();} static void set_mod(int _m){bar=FastDiv(_m);} Fp inv() const{ int tmp,a=v,b=get_mod(),x=1,y=0; while(b){ tmp=a/b,a-=tmp*b; swap(a,b); x-=tmp*y; swap(x,y); } if(x<0){x+=get_mod();} return x; } Fp():v(0){} Fp(ll x){ v=x%get_mod(); if(v<0)v+=get_mod(); } Fp operator-()const{return Fp()-*this;} Fp pow(ll t){ assert(t>=0); Fp res=1,b=*this; while(t){ if(t&1)res*=b; b*=b; t>>=1; } return res; } Fp& operator+=(const Fp& x){ v+=x.v; if(v>=get_mod())v-=get_mod(); return *this; } Fp& operator-=(const Fp& x){ v+=get_mod()-x.v; if(v>=get_mod())v-=get_mod(); return *this; } Fp& operator*=(const Fp& x){ v=(u64(v)*x.v)%bar; return *this; } Fp& operator/=(const Fp& x){ (*this)*=x.inv(); return *this; } Fp operator+(const Fp& x)const{return Fp(*this)+=x;} Fp operator-(const Fp& x)const{return Fp(*this)-=x;} Fp operator*(const Fp& x)const{return Fp(*this)*=x;} Fp operator/(const Fp& x)const{return Fp(*this)/=x;} bool operator==(const Fp& x)const{return v==x.v;} bool operator!=(const Fp& x)const{return v!=x.v;} private: static FastDiv bar; static int _getmod(){return bar.get();} }; FastDiv Fp::bar(998244353); /** * @brief Dynamic Modint */ #line 2 "Convolution/ntt.hpp" template <typename T> struct NTT { static constexpr int rank2 = __builtin_ctzll(T::get_mod() - 1); std::array<T, rank2 + 1> root; // root[i]^(2^i) == 1 std::array<T, rank2 + 1> iroot; // root[i] * iroot[i] == 1 std::array<T, std::max(0, rank2 - 2 + 1)> rate2; std::array<T, std::max(0, rank2 - 2 + 1)> irate2; std::array<T, std::max(0, rank2 - 3 + 1)> rate3; std::array<T, std::max(0, rank2 - 3 + 1)> irate3; NTT() { T g = 2; while (g.pow((T::get_mod() - 1) >> 1) == 1) { g += 1; } root[rank2] = g.pow((T::get_mod() - 1) >> rank2); iroot[rank2] = root[rank2].inv(); for (int i = rank2 - 1; i >= 0; i--) { root[i] = root[i + 1] * root[i + 1]; iroot[i] = iroot[i + 1] * iroot[i + 1]; } { T prod = 1, iprod = 1; for (int i = 0; i <= rank2 - 2; i++) { rate2[i] = root[i + 2] * prod; irate2[i] = iroot[i + 2] * iprod; prod *= iroot[i + 2]; iprod *= root[i + 2]; } } { T prod = 1, iprod = 1; for (int i = 0; i <= rank2 - 3; i++) { rate3[i] = root[i + 3] * prod; irate3[i] = iroot[i + 3] * iprod; prod *= iroot[i + 3]; iprod *= root[i + 3]; } } } void ntt(std::vector<T> &a, bool type = 0) { int n = int(a.size()); int h = __builtin_ctzll((unsigned int)n); a.resize(1 << h); if (type) { int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed while (len) { if (len == 1) { int p = 1 << (h - len); T irot = 1; for (int s = 0; s < (1 << (len - 1)); s++) { int offset = s << (h - len + 1); for (int i = 0; i < p; i++) { auto l = a[i + offset]; auto r = a[i + offset + p]; a[i + offset] = l + r; a[i + offset + p] = (unsigned long long)(T::get_mod() + l.v - r.v) * irot.v; ; } if (s + 1 != (1 << (len - 1))) irot *= irate2[__builtin_ctzll(~(unsigned int)(s))]; } len--; } else { // 4-base int p = 1 << (h - len); T irot = 1, iimag = iroot[2]; for (int s = 0; s < (1 << (len - 2)); s++) { T irot2 = irot * irot; T irot3 = irot2 * irot; int offset = s << (h - len + 2); for (int i = 0; i < p; i++) { auto a0 = 1ULL * a[i + offset + 0 * p].v; auto a1 = 1ULL * a[i + offset + 1 * p].v; auto a2 = 1ULL * a[i + offset + 2 * p].v; auto a3 = 1ULL * a[i + offset + 3 * p].v; auto a2na3iimag = 1ULL * T((T::get_mod() + a2 - a3) * iimag.v).v; a[i + offset] = a0 + a1 + a2 + a3; a[i + offset + 1 * p] = (a0 + (T::get_mod() - a1) + a2na3iimag) * irot.v; a[i + offset + 2 * p] = (a0 + a1 + (T::get_mod() - a2) + (T::get_mod() - a3)) * irot2.v; a[i + offset + 3 * p] = (a0 + (T::get_mod() - a1) + (T::get_mod() - a2na3iimag)) * irot3.v; } if (s + 1 != (1 << (len - 2))) irot *= irate3[__builtin_ctzll(~(unsigned int)(s))]; } len -= 2; } } T e = T(n).inv(); for (auto &x : a) x *= e; } else { int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed while (len < h) { if (h - len == 1) { int p = 1 << (h - len - 1); T rot = 1; for (int s = 0; s < (1 << len); s++) { int offset = s << (h - len); for (int i = 0; i < p; i++) { auto l = a[i + offset]; auto r = a[i + offset + p] * rot; a[i + offset] = l + r; a[i + offset + p] = l - r; } if (s + 1 != (1 << len)) rot *= rate2[__builtin_ctzll(~(unsigned int)(s))]; } len++; } else { // 4-base int p = 1 << (h - len - 2); T rot = 1, imag = root[2]; for (int s = 0; s < (1 << len); s++) { T rot2 = rot * rot; T rot3 = rot2 * rot; int offset = s << (h - len); for (int i = 0; i < p; i++) { auto mod2 = 1ULL * T::get_mod() * T::get_mod(); auto a0 = 1ULL * a[i + offset].v; auto a1 = 1ULL * a[i + offset + p].v * rot.v; auto a2 = 1ULL * a[i + offset + 2 * p].v * rot2.v; auto a3 = 1ULL * a[i + offset + 3 * p].v * rot3.v; auto a1na3imag = 1ULL * T(a1 + mod2 - a3).v * imag.v; auto na2 = mod2 - a2; a[i + offset] = a0 + a2 + a1 + a3; a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3)); a[i + offset + 2 * p] = a0 + na2 + a1na3imag; a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag); } if (s + 1 != (1 << len)) rot *= rate3[__builtin_ctzll(~(unsigned int)(s))]; } len += 2; } } } } vector<T> mult(const vector<T> &a, const vector<T> &b) { if (a.empty() or b.empty()) return vector<T>(); int as = a.size(), bs = b.size(); int n = as + bs - 1; if (as <= 30 or bs <= 30) { if (as > 30) return mult(b, a); vector<T> res(n); rep(i, 0, as) rep(j, 0, bs) res[i + j] += a[i] * b[j]; return res; } int m = 1; while (m < n) m <<= 1; vector<T> res(m); rep(i, 0, as) res[i] = a[i]; ntt(res); if (a == b) rep(i, 0, m) res[i] *= res[i]; else { vector<T> c(m); rep(i, 0, bs) c[i] = b[i]; ntt(c); rep(i, 0, m) res[i] *= c[i]; } ntt(res, 1); res.resize(n); return res; } }; /** * @brief Number Theoretic Transform */ #line 2 "Math/modint.hpp" template <unsigned mod = 1000000007> struct fp { unsigned v; static constexpr int get_mod() { return mod; } constexpr unsigned inv() const { assert(v != 0); int x = v, y = mod, p = 1, q = 0, t = 0, tmp = 0; while (y > 0) { t = x / y; x -= t * y, p -= t * q; tmp = x, x = y, y = tmp; tmp = p, p = q, q = tmp; } if (p < 0) p += mod; return p; } constexpr fp(ll x = 0) : v(x >= 0 ? x % mod : (mod - (-x) % mod) % mod) {} fp operator-() const { return fp() - *this; } fp pow(ull t) { fp res = 1, b = *this; while (t) { if (t & 1) res *= b; b *= b; t >>= 1; } return res; } fp &operator+=(const fp &x) { if ((v += x.v) >= mod) v -= mod; return *this; } fp &operator-=(const fp &x) { if ((v += mod - x.v) >= mod) v -= mod; return *this; } fp &operator*=(const fp &x) { v = ull(v) * x.v % mod; return *this; } fp &operator/=(const fp &x) { v = ull(v) * x.inv() % mod; return *this; } fp operator+(const fp &x) const { return fp(*this) += x; } fp operator-(const fp &x) const { return fp(*this) -= x; } fp operator*(const fp &x) const { return fp(*this) *= x; } fp operator/(const fp &x) const { return fp(*this) /= x; } bool operator==(const fp &x) const { return v == x.v; } bool operator!=(const fp &x) const { return v != x.v; } friend istream &operator>>(istream &is, fp &x) { return is >> x.v; } friend ostream &operator<<(ostream &os, const fp &x) { return os << x.v; } }; template <unsigned mod> void rd(fp<mod> &x) { fastio::rd(x.v); } template <unsigned mod> void wt(fp<mod> x) { fastio::wt(x.v); } template <typename T> T Inv(ll n) { static const int md = T::get_mod(); static vector<T> buf({0, 1}); assert(n > 0); n %= md; while (SZ(buf) <= n) { int k = SZ(buf), q = (md + k - 1) / k; buf.push_back(buf[k * q - md] * q); } return buf[n]; } template <typename T> T Fact(ll n, bool inv = 0) { static const int md = T::get_mod(); static vector<T> buf({1, 1}), ibuf({1, 1}); assert(n >= 0 and n < md); while (SZ(buf) <= n) { buf.push_back(buf.back() * SZ(buf)); ibuf.push_back(ibuf.back() * Inv<T>(SZ(ibuf))); } return inv ? ibuf[n] : buf[n]; } template <typename T> T nPr(int n, int r, bool inv = 0) { if (n < 0 || n < r || r < 0) return 0; return Fact<T>(n, inv) * Fact<T>(n - r, inv ^ 1); } template <typename T> T nCr(int n, int r, bool inv = 0) { if (n < 0 || n < r || r < 0) return 0; return Fact<T>(n, inv) * Fact<T>(r, inv ^ 1) * Fact<T>(n - r, inv ^ 1); } template <typename T> T nHr(int n, int r, bool inv = 0) { return nCr<T>(n + r - 1, r, inv); } /** * @brief Modint */ #line 4 "Convolution/arbitrary.hpp" using M1 = fp<1045430273>; using M2 = fp<1051721729>; using M3 = fp<1053818881>; NTT<M1> N1; NTT<M2> N2; NTT<M3> N3; constexpr int r_12 = M2(M1::get_mod()).inv(); constexpr int r_13 = M3(M1::get_mod()).inv(); constexpr int r_23 = M3(M2::get_mod()).inv(); constexpr int r_1323 = M3(ll(r_13) * r_23).v; constexpr ll w1 = M1::get_mod(); constexpr ll w2 = ll(w1) * M2::get_mod(); template <typename T> vector<T> ArbitraryMult(const vector<int> &a, const vector<int> &b) { if (a.empty() or b.empty()) return vector<T>(); int n = a.size() + b.size() - 1; vector<T> res(n); if (min(a.size(), b.size()) <= 60) { rep(i, 0, a.size()) rep(j, 0, b.size()) res[i + j] += T(a[i]) * b[j]; return res; } vector<int> vals[3]; vector<M1> a1(ALL(a)), b1(ALL(b)), c1 = N1.mult(a1, b1); vector<M2> a2(ALL(a)), b2(ALL(b)), c2 = N2.mult(a2, b2); vector<M3> a3(ALL(a)), b3(ALL(b)), c3 = N3.mult(a3, b3); for (M1 x : c1) vals[0].push_back(x.v); for (M2 x : c2) vals[1].push_back(x.v); for (M3 x : c3) vals[2].push_back(x.v); rep(i, 0, n) { ll p = vals[0][i]; ll q = (vals[1][i] + M2::get_mod() - p) * r_12 % M2::get_mod(); ll r = ((vals[2][i] + M3::get_mod() - p) * r_1323 + (M3::get_mod() - q) * r_23) % M3::get_mod(); res[i] = (T(r) * w2 + q * w1 + p); } return res; } template <typename T> vector<T> ArbitraryMult(const vector<T> &a, const vector<T> &b) { vector<int> A, B; for (auto &x : a) A.push_back(x.v); for (auto &x : b) B.push_back(x.v); return ArbitraryMult<T>(A, B); } /** * @brief Arbitrary Mod Convolution */ #line 2 "FPS/arbitraryfps.hpp" template <typename T> struct Poly : vector<T> { Poly(int n = 0) { this->assign(n, T()); } Poly(const initializer_list<T> f) : vector<T>::vector(f) {} Poly(const vector<T> &f) { this->assign(ALL(f)); } int deg() const { return this->size() - 1; } T eval(const T &x) { T res; for (int i = this->size() - 1; i >= 0; i--) res *= x, res += this->at(i); return res; } Poly rev() const { Poly res = *this; reverse(ALL(res)); return res; } void shrink() { while (!this->empty() and this->back() == 0) this->pop_back(); } Poly operator>>(ll sz) const { if ((int)this->size() <= sz) return {}; Poly ret(*this); ret.erase(ret.begin(), ret.begin() + sz); return ret; } Poly operator<<(ll sz) const { Poly ret(*this); ret.insert(ret.begin(), sz, T(0)); return ret; } Poly inv() const { assert(this->front() != 0); const int n = this->size(); Poly res(1); res.front() = T(1) / this->front(); for (int k = 1; k < n; k <<= 1) { Poly g = res, h = *this; h.resize(k * 2); res.resize(k * 2); g = (g.square() * h); g.resize(k * 2); rep(i, k, min(k * 2, n)) res[i] -= g[i]; } res.resize(n); return res; } Poly square() const { return Poly(mult(*this, *this)); } Poly operator-() const { return Poly() - *this; } Poly operator+(const Poly &g) const { return Poly(*this) += g; } Poly operator+(const T &g) const { return Poly(*this) += g; } Poly operator-(const Poly &g) const { return Poly(*this) -= g; } Poly operator-(const T &g) const { return Poly(*this) -= g; } Poly operator*(const Poly &g) const { return Poly(*this) *= g; } Poly operator*(const T &g) const { return Poly(*this) *= g; } Poly operator/(const Poly &g) const { return Poly(*this) /= g; } Poly operator%(const Poly &g) const { return Poly(*this) %= g; } pair<Poly, Poly> divmod(const Poly &g) const { Poly q = *this / g, r = *this - g * q; r.shrink(); return {q, r}; } Poly &operator+=(const Poly &g) { if (g.size() > this->size()) this->resize(g.size()); rep(i, 0, g.size()) { (*this)[i] += g[i]; } return *this; } Poly &operator+=(const T &g) { if (this->empty()) this->push_back(0); (*this)[0] += g; return *this; } Poly &operator-=(const Poly &g) { if (g.size() > this->size()) this->resize(g.size()); rep(i, 0, g.size()) { (*this)[i] -= g[i]; } return *this; } Poly &operator-=(const T &g) { if (this->empty()) this->push_back(0); (*this)[0] -= g; return *this; } Poly &operator*=(const Poly &g) { *this = mult(*this, g); return *this; } Poly &operator*=(const T &g) { rep(i, 0, this->size())(*this)[i] *= g; return *this; } Poly &operator/=(const Poly &g) { if (g.size() > this->size()) { this->clear(); return *this; } Poly g2 = g; reverse(ALL(*this)); reverse(ALL(g2)); int n = this->size() - g2.size() + 1; this->resize(n); g2.resize(n); *this *= g2.inv(); this->resize(n); reverse(ALL(*this)); shrink(); return *this; } Poly &operator%=(const Poly &g) { *this -= *this / g * g; shrink(); return *this; } Poly diff() const { Poly res(this->size() - 1); rep(i, 0, res.size()) res[i] = (*this)[i + 1] * (i + 1); return res; } Poly inte() const { Poly res(this->size() + 1); for (int i = res.size() - 1; i; i--) res[i] = (*this)[i - 1] / i; return res; } Poly log() const { assert(this->front() == 1); const int n = this->size(); Poly res = diff() * inv(); res = res.inte(); res.resize(n); return res; } Poly exp() const { assert(this->front() == 0); const int n = this->size(); Poly res(1), g(1); res.front() = g.front() = 1; for (int k = 1; k < n; k <<= 1) { g = (g + g - g.square() * res); g.resize(k); Poly q = *this; q.resize(k); q = q.diff(); Poly w = (q + g * (res.diff() - res * q)), t = *this; w.resize(k * 2 - 1); t.resize(k * 2); res = (res + res * (t - w.inte())); res.resize(k * 2); } res.resize(n); return res; } Poly shift(const int &c) const { const int n = this->size(); Poly res = *this, g(n); g[0] = 1; rep(i, 1, n) g[i] = g[i - 1] * c / i; vector<T> fact(n, 1); rep(i, 0, n) { if (i) fact[i] = fact[i - 1] * i; res[i] *= fact[i]; } res = res.rev(); res *= g; res.resize(n); res = res.rev(); rep(i, 0, n) res[i] /= fact[i]; return res; } Poly pow(ll t) { if (t == 0) { Poly res(this->size()); res[0] = 1; return res; } int n = this->size(), k = 0; while (k < n and (*this)[k] == 0) k++; Poly res(n); if (__int128_t(t) * k >= n) return res; n -= t * k; Poly g(n); T c = (*this)[k], ic = c.inv(); rep(i, 0, n) g[i] = (*this)[i + k] * ic; g = g.log(); for (auto &x : g) x *= t; g = g.exp(); c = c.pow(t); rep(i, 0, n) res[i + t * k] = g[i] * c; return res; } vector<T> mult(const vector<T> &a, const vector<T> &b) const; }; /** * @brief Formal Power Series (Arbitrary mod) */ #line 10 "Verify/LC_multivariate_convolution_cyclic.test.cpp" template <> vector<Fp> Poly<Fp>::mult(const vector<Fp> &a, const vector<Fp> &b) const { return ArbitraryMult<Fp>(a, b); } #line 2 "Math/miller.hpp" struct m64 { using i64 = int64_t; using u64 = uint64_t; using u128 = __uint128_t; static u64 mod; static u64 r; static u64 n2; static u64 get_r() { u64 ret = mod; rep(_,0,5) ret *= 2 - mod * ret; return ret; } static void set_mod(u64 m) { assert(m < (1LL << 62)); assert((m & 1) == 1); mod = m; n2 = -u128(m) % m; r = get_r(); assert(r * mod == 1); } static u64 get_mod() { return mod; } u64 a; m64() : a(0) {} m64(const int64_t &b) : a(reduce((u128(b) + mod) * n2)){}; static u64 reduce(const u128 &b) { return (b + u128(u64(b) * u64(-r)) * mod) >> 64; } u64 get() const { u64 ret = reduce(a); return ret >= mod ? ret - mod : ret; } m64 &operator*=(const m64 &b) { a = reduce(u128(a) * b.a); return *this; } m64 operator*(const m64 &b) const { return m64(*this) *= b; } bool operator==(const m64 &b) const { return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a); } bool operator!=(const m64 &b) const { return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a); } m64 pow(u128 n) const { m64 ret(1), mul(*this); while (n > 0) { if (n & 1) ret *= mul; mul *= mul; n >>= 1; } return ret; } }; typename m64::u64 m64::mod, m64::r, m64::n2; bool Miller(ll n){ if(n<2 or (n&1)==0)return (n==2); m64::set_mod(n); ll d=n-1; while((d&1)==0)d>>=1; vector<ll> seeds; if(n<(1<<30))seeds={2, 7, 61}; else seeds={2, 325, 9375, 28178, 450775, 9780504}; for(auto& x:seeds){ if(n<=x)break; ll t=d; m64 y=m64(x).pow(t); while(t!=n-1 and y!=1 and y!=n-1){ y*=y; t<<=1; } if(y!=n-1 and (t&1)==0)return 0; } return 1; } /** * @brief Miller-Rabin */ #line 2 "Utility/random.hpp" namespace Random { mt19937_64 randgen(chrono::steady_clock::now().time_since_epoch().count()); using u64 = unsigned long long; u64 get() { return randgen(); } template <typename T> T get(T L) { // [0,L] return get() % (L + 1); } template <typename T> T get(T L, T R) { // [L,R] return get(R - L) + L; } double uniform() { return double(get(1000000000)) / 1000000000; } string str(int n) { string ret; rep(i, 0, n) ret += get('a', 'z'); return ret; } template <typename Iter> void shuffle(Iter first, Iter last) { if (first == last) return; int len = 1; for (auto it = first + 1; it != last; it++) { len++; int j = get(0, len - 1); if (j != len - 1) iter_swap(it, first + j); } } template <typename T> vector<T> select(int n, T L, T R) { // [L,R] if (n * 2 >= R - L + 1) { vector<T> ret(R - L + 1); iota(ALL(ret), L); shuffle(ALL(ret)); ret.resize(n); return ret; } else { unordered_set<T> used; vector<T> ret; while (SZ(used) < n) { T x = get(L, R); if (!used.count(x)) { used.insert(x); ret.push_back(x); } } return ret; } } void relabel(int n, vector<pair<int, int>> &es) { shuffle(ALL(es)); vector<int> ord(n); iota(ALL(ord), 0); shuffle(ALL(ord)); for (auto &[u, v] : es) u = ord[u], v = ord[v]; } template <bool directed, bool simple> vector<pair<int, int>> genGraph(int n) { vector<pair<int, int>> cand, es; rep(u, 0, n) rep(v, 0, n) { if (simple and u == v) continue; if (!directed and u > v) continue; cand.push_back({u, v}); } int m = get(SZ(cand)); vector<int> ord; if (simple) ord = select(m, 0, SZ(cand) - 1); else { rep(_, 0, m) ord.push_back(get(SZ(cand) - 1)); } for (auto &i : ord) es.push_back(cand[i]); relabel(n, es); return es; } vector<pair<int, int>> genTree(int n) { vector<pair<int, int>> es; rep(i, 1, n) es.push_back({get(i - 1), i}); relabel(n, es); return es; } }; // namespace Random /** * @brief Random */ #line 4 "Math/pollard.hpp" vector<ll> Pollard(ll n) { if (n <= 1) return {}; if (Miller(n)) return {n}; if ((n & 1) == 0) { vector<ll> v = Pollard(n >> 1); v.push_back(2); return v; } for (ll x = 2, y = 2, d;;) { ll c = Random::get(2LL, n - 1); do { x = (__int128_t(x) * x + c) % n; y = (__int128_t(y) * y + c) % n; y = (__int128_t(y) * y + c) % n; d = __gcd(x - y + n, n); } while (d == 1); if (d < n) { vector<ll> lb = Pollard(d), rb = Pollard(n / d); lb.insert(lb.end(), ALL(rb)); return lb; } } } /** * @brief Pollard-Rho */ #line 4 "Math/primitive.hpp" ll mpow(ll a, ll t, ll m) { ll res = 1; FastDiv im(m); while (t) { if (t & 1) res = __int128_t(res) * a % im; a = __int128_t(a) * a % im; t >>= 1; } return res; } ll minv(ll a, ll m) { ll b = m, u = 1, v = 0; while (b) { ll t = a / b; a -= t * b; swap(a, b); u -= t * v; swap(u, v); } u = (u % m + m) % m; return u; } ll getPrimitiveRoot(ll p) { vector<ll> ps = Pollard(p - 1); sort(ALL(ps)); rep(x, 1, inf) { for (auto &q : ps) { if (mpow(x, (p - 1) / q, p) == 1) goto fail; } return x; fail:; } assert(0); } ll extgcd(ll a, ll b, ll &p, ll &q) { if (b == 0) { p = 1; q = 0; return a; } ll d = extgcd(b, a % b, q, p); q -= a / b * p; return d; } pair<ll, ll> crt(const vector<ll> &vs, const vector<ll> &ms) { ll V = vs[0], M = ms[0]; rep(i, 1, vs.size()) { ll p, q, v = vs[i], m = ms[i]; if (M < m) swap(M, m), swap(V, v); ll d = extgcd(M, m, p, q); if ((v - V) % d != 0) return {0, -1}; ll md = m / d, tmp = (v - V) / d % md * p % md; V += M * tmp; M *= md; } V = (V % M + M) % M; return {V, M}; } ll ModLog(ll a, ll b, ll p) { ll g = 1; for (ll t = p; t; t >>= 1) g = g * a % p; g = __gcd(g, p); ll t = 1, c = 0; for (; t % g; c++) { if (t == b) return c; t = t * a % p; } if (b % g) return -1; t /= g, b /= g; ll n = p / g, h = 0, gs = 1; for (; h * h < n; h++) gs = gs * a % n; unordered_map<ll, ll> bs; for (ll s = 0, e = b; s < h; bs[e] = ++s) e = e * a % n; for (ll s = 0, e = t; s < n;) { e = e * gs % n, s += h; if (bs.count(e)) { return c + s - bs[e]; } } return -1; } ll mod_root(ll k, ll a, ll m) { if (a == 0) return k ? 0 : -1; if (m == 2) return a & 1; k %= m - 1; ll g = gcd(k, m - 1); if (mpow(a, (m - 1) / g, m) != 1) return -1; a = mpow(a, minv(k / g, (m - 1) / g), m); FastDiv im(m); auto _subroot = [&](ll p, int e, ll a) -> ll { // x^(p^e)==a(mod m) ll q = m - 1; int s = 0; while (q % p == 0) { q /= p; s++; } int d = s - e; ll pe = mpow(p, e, m), res = mpow(a, ((pe - 1) * minv(q, pe) % pe * q + 1) / pe, m), c = 1; while (mpow(c, (m - 1) / p, m) == 1) c++; c = mpow(c, q, m); map<ll, ll> mp; ll v = 1, block = sqrt(d * p) + 1, bs = mpow(c, mpow(p, s - 1, m - 1) * block % (m - 1), m); rep(i, 0, block + 1) mp[v] = i, v = v * bs % im; ll gs = minv(mpow(c, mpow(p, s - 1, m - 1), m), m); rep(i, 0, d) { ll err = a * minv(mpow(res, pe, m), m) % im; ll pos = mpow(err, mpow(p, d - 1 - i, m - 1), m); rep(j, 0, block + 1) { if (mp.count(pos)) { res = res * mpow(c, (block * mp[pos] + j) * mpow(p, i, m - 1) % (m - 1), m) % im; break; } pos = pos * gs % im; } } return res; }; for (ll d = 2; d * d <= g; d++) if (g % d == 0) { int sz = 0; while (g % d == 0) { g /= d; sz++; } a = _subroot(d, sz, a); } if (g > 1) a = _subroot(g, 1, a); return a; } ull floor_root(ull a, ull k) { if (a <= 1 or k == 1) return a; if (k >= 64) return 1; if (k == 2) return sqrtl(a); constexpr ull LIM = -1; if (a == LIM) a--; auto mul = [&](ull &x, const ull &y) { if (x <= LIM / y) x *= y; else x = LIM; }; auto pw = [&](ull x, ull t) -> ull { ull y = 1; while (t) { if (t & 1) mul(y, x); mul(x, x); t >>= 1; } return y; }; ull ret = (k == 3 ? cbrt(a) - 1 : pow(a, nextafter(1 / double(k), 0))); while (pw(ret + 1, k) <= a) ret++; return ret; } /** * @brief Primitive Function */ #line 2 "FPS/multievalgeom.hpp" template<typename T>vector<T> MultievalGeomSeq(vector<T>& f,T a,T w,int m){ int n=f.size(); vector<T> ret(m); if(w==0){ T base=1; rep(i,0,n)ret[0]+=base*f[i],base*=a; rep(i,1,m)ret[i]=f[0]; return ret; } vector<T> tri(n+m-1),itri(n+m-1); tri[0]=itri[0]=1; T iw=w.inv(),pww=1,pwiw=1; for(int i=1;i<n+m-1;i++,pww*=w,pwiw*=iw){ tri[i]=tri[i-1]*pww; itri[i]=itri[i-1]*pwiw; } Poly<T> y(n),v(n+m-1); T pwa=1; for(int i=0;i<n;i++,pwa*=a){ y[i]=f[i]*itri[i]*pwa; } rep(i,0,n+m-1)v[i]=tri[i]; reverse(ALL(y)); y*=v; rep(i,0,m)ret[i]=y[n-1+i]*itri[i]; return ret; } /** * @brief Multipoint Evaluation on Geometric Sequence */ #line 4 "Convolution/multivariatecyclic.hpp" template<typename T>vector<T> MultivariateCyclic (vector<T> f,vector<T> g,vector<int>& a){ int MO=T::get_mod(); int k=a.size(),n=1; for(auto& x:a)n*=x; T pr=getPrimitiveRoot(MO),ipr=T(pr).inv(); int offset=1; rep(x,0,k){ assert((MO-1)%a[x]==0); T w=pr.pow((MO-1)/a[x]); rep(i,0,n)if(i%(offset*a[x])<offset){ vector<T> na(a[x]),nb(a[x]); rep(j,0,a[x]){ na[j]=f[i+offset*j]; nb[j]=g[i+offset*j]; } na=MultievalGeomSeq(na,T(1),w,a[x]); nb=MultievalGeomSeq(nb,T(1),w,a[x]); rep(j,0,a[x]){ f[i+offset*j]=na[j]; g[i+offset*j]=nb[j]; } } offset*=a[x]; } rep(i,0,n)f[i]*=g[i]; offset=1; rep(x,0,k){ T iw=ipr.pow((MO-1)/a[x]); rep(i,0,n)if(i%(offset*a[x])<offset){ vector<T> na(a[x]); rep(j,0,a[x])na[j]=f[i+offset*j]; na=MultievalGeomSeq(na,T(1),iw,a[x]); rep(j,0,a[x])f[i+offset*j]=na[j]; } offset*=a[x]; } T ninv=T(n).inv(); rep(i,0,n)f[i]*=ninv; return f; } /** * @brief Multivarate Convolution Cyclic */ #line 15 "Verify/LC_multivariate_convolution_cyclic.test.cpp" int main() { int p, k; read(p, k); Fp::set_mod(p); vector<int> a(k); read(a); int n = 1; for (auto &x : a) n *= x; vector<Fp> f(n), g(n); rep(i, 0, n) read(f[i].v); rep(i, 0, n) read(g[i].v); auto ret = MultivariateCyclic(f, g, a); rep(i, 0, n) print(ret[i].v); return 0; }